2019 H3 HA¶

This file is filtered using the following parameters, specified in data/polyclonal_config.yaml: * Functional effect threshold (-1.38) * Minimum times seen (n=3) * Allowed amino acids (all except stop codons)

[1]:

import altair as alt

import pandas as pd

import polyclonal

import yaml

Read the data and get config parameters

[2]:

with open('data/polyclonal_config.yaml') as f:
    config = yaml.safe_load(f)['overall_default']['plot_kwargs']

func_effect = config['addtl_slider_stats']['functional effect']
times_seen = config['addtl_slider_stats']['times_seen']
aa_list = config['alphabet']

Get functional effects

[3]:

muteffects_csv = "results/muteffects_functional/muteffects_observed.csv"

muteffects = pd.read_csv(muteffects_csv).rename(
    columns={"reference_site": "site", "effect": "functional effect"}
)[["site", "mutant", "functional effect"]]

Define samples in each age cohort

[4]:

cohort_dict = {
    '2-5_years': [
        '3944',
        '2389',
        '2323',
        '2388',
        '3973',
        '4299',
        '4584',
        '2367',
    ],
    '15-20_years': [
        '2350',
        '2365',
        '2382',
        '3866',
        '2380',
        '3856',
        '3857',
        '3862'
    ],
    '40-45_years': [
        '33C',
        '34C',
        '197C',
        '199C',
        '215C',
        '210C',
        '74C',
        '68C',
        '150C',
        '18C',
    ],
    'infant': [
        '2462',
    ],
    '68_years': [
        'AUSAB-13',
    ]
}

Read the library-averaged escape dfs for each serum, filter by defined parameters, and combine to one summary escape file.

[5]:

escape_df_list = []

for cohort, serum_list in cohort_dict.items():
    for serum in serum_list:
        df = (pd.read_csv(f'results/antibody_escape/{serum}_avg.csv')
              .query(f"`times_seen` >= @times_seen")
              .query("`mutant` in @aa_list")
              .merge(muteffects,
                      how='left',
                      on=['site', 'mutant']
                     )
              .query("`functional effect` >= @func_effect")
             )

        df['serum'] = serum
        df['cohort'] = cohort

        # drop extraneous columns
        df = df.drop(['epitope', 'escape_median', 'escape_min_magnitude'], axis=1)

        escape_df_list.append(df)

escape_df = pd.concat(escape_df_list)

[6]:

output_csv = 'results/full_hk19_escape_scores.csv'
print(f'Writing to {output_csv}')
escape_df.to_csv(output_csv, index=False)

escape_df

Writing to results/full_hk19_escape_scores.csv

[6]:

	site	wildtype	mutant	mutation	escape_mean	escape_std	n_models	times_seen	frac_models	functional effect	serum	cohort
0	-2	D	G	D-2G	0.1278	0.0473	2	3.0	1.0	-0.6452	3944	2-5_years
1	-2	D	Y	D-2Y	0.0338	0.0501	2	7.0	1.0	-0.7111	3944	2-5_years
2	1	Q	H	Q1H	0.0069	0.1994	2	3.0	1.0	-0.1690	3944	2-5_years
3	1	Q	R	Q1R	-0.0235	0.1103	2	5.0	1.0	-0.6300	3944	2-5_years
4	2	K	N	K2N	-0.0178	0.0990	2	5.0	1.0	-0.1303	3944	2-5_years
...	...	...	...	...	...	...	...	...	...	...	...	...
2765	538	A	T	A538T	-0.0144	0.0011	2	3.0	1.0	-0.0882	AUSAB-13	68_years
2766	538	A	V	A538V	-0.0129	0.0038	2	9.5	1.0	-0.4286	AUSAB-13	68_years
2767	540	Q	H	Q540H	0.0298	0.0467	2	5.0	1.0	-0.1289	AUSAB-13	68_years
2768	540	Q	K	Q540K	-0.0069	0.0640	2	8.5	1.0	-0.6159	AUSAB-13	68_years
2769	540	Q	R	Q540R	-0.0347	0.0355	2	8.5	1.0	-1.3188	AUSAB-13	68_years

69633 rows × 12 columns