Process data from Perth/2009 HA

Some of these data are taken from a previously published study (Lee et al (2019)); the rest represent unpublished data by Rachel Eguia and Juhye Lee in this repository (https://github.com/jbloomlab/map_flu_serum_Vietnam_H3_Perth2009).

The Eguia samples were all collected in Vietnam in 2010-2011.

Import Python modules:

[1]:
import altair as alt

import pandas as pd

import polyclonal.plot

Read the data:

[2]:
avg_sel_tidy = pd.concat(
    [
        pd.read_csv(csv_file, low_memory=False).assign(dataset=dataset)
        for csv_file, dataset in [
            ("results/perth2009/eguia_avg_sel_tidy.csv", "Eguia"),
            ("results/perth2009/lee_avg_sel_tidy.csv", "Lee2019"),
        ]
    ],
    ignore_index=True,
)

assert (
    avg_sel_tidy["serum_name_formatted"].nunique()
    == avg_sel_tidy["serum"].nunique()
    == len(avg_sel_tidy.groupby(["serum", "dataset"]))
)

Make a dictionary to rename samples, and also specify those to drop:

[3]:
sample_rename = {
    # Vietnam cohort samples from Eguia study (collected 2010-2011)
    "age-30.5": "age 30.5 (Vietnam)",
    "age-31.5": "age 31.5 (Vietnam)",
    "age-33.5": "age 33.5 (Vietnam)",
    "age-2.1": "age 2.1 (Vietnam)",
    "age-2.2": "age 2.2 (Vietnam)",
    "age-2.4": "age 2.4 (Vietnam)",
    "age-2.5": "age 2.5 (Vietnam)",
    "age-2.5-b": "age 2.5b (Vietnam)",
    "age-3.3": "age 3.3 (Vietnam)",
    "age-3.3-b": "age 3.3b (Vietnam)",
    "age-3.4": "age 3.4 (Vietnam)",
    "age-3.5": "age 3.5 (Vietnam)",
    # previously published samples from Lee et al 2019; only keeping ones from 2009-2010 timeframe
    "2010-age-21": "age 21 (Seattle)",
    "2009-age-53": "age 53 (Seattle)",
    "2009-age-64": "age 64 (Seattle)",
    "2009-age-65": "age 65 (Seattle)",
    # ferrets infected with Perth/2009
    "ferret-Pitt-1-postinf": "ferret 1 (Pitt)",
    "ferret-Pitt-2-postinf": "ferret 2 (Pitt)",
    "ferret-Pitt-3-postinf": "ferret 3 (Pitt)",
    "ferret-WHO-Perth2009": "ferret (WHO)",
}

samples_to_drop = [
    # ignore antibodies, not relevant to this study
    "antibody-4F03",
    "antibody-1C04",
    "antibody-5A01",
    "antibody-3C04",
    "antibody-3C06",
    "antibody-4C01",
    # collected too late to be comparable to Vietnam samples
    "2015-age-25-prevacc",
    "2015-age-25-vacc",
    "2015-age-29-prevacc",
    "2015-age-29-vacc",
    "2015-age-48-prevacc",
    "2015-age-48-vacc",
    "2015-age-49-prevacc",
    "2015-age-49-vacc",
    # antibody spike-in samples, not relevant to this study
    "2009-age-65-with-low-4F03",
    "2009-age-65-with-mid-4F03",
    "2009-age-65-with-hi-4F03",
    # duplicate sample from an individual, don't include repeats
    "2009-age-53-plus-2-months",
    # pre-infection ferrets aren't relevant
    "ferret-Pitt-1-preinf",
    "ferret-Pitt-2-preinf",
    "ferret-Pitt-3-preinf",
    # ferret infected with a different strain
    "ferret-WHO-Victoria2011",
]

Make consistent with the format produced by polyclonal. All of these are single epitope so we just assign epitope to one:

[4]:
assert set(avg_sel_tidy["serum_name_formatted"]) == set(sample_rename).union(samples_to_drop)

avg_sel = (
    avg_sel_tidy
    .query("serum_name_formatted not in @samples_to_drop")
    .assign(
        name=lambda x: x["serum_name_formatted"].map(sample_rename),
        epitope=1,
        escape=lambda x: x["mutdiffsel"].where(x["wildtype"] != x["mutation"], 0),
    )
    .rename(columns={"isite": "site_sequential", "mutation": "mutant"})
    [[
        "name",
        "serum",
        "serum_group",
        "epitope",
        "site",
        "site_sequential",
        "wildtype",
        "mutant",
        "escape",
    ]]
)

assert set(avg_sel["name"]) == set(sample_rename.values())

output_csv = "results/perth2009/merged_escape.csv"
print(f"Writing to {output_csv}")
avg_sel.to_csv(output_csv, index=False, float_format="%.4g")

avg_sel
Writing to results/perth2009/merged_escape.csv
[4]:
name serum serum_group epitope site site_sequential wildtype mutant escape
0 age 30.5 (Vietnam) HC150108 adult 1 159 174 F G 3.48230
1 age 30.5 (Vietnam) HC150108 adult 1 159 174 F N 2.45110
2 age 30.5 (Vietnam) HC150108 adult 1 159 174 F H 2.33410
3 age 30.5 (Vietnam) HC150108 adult 1 159 174 F T 1.51360
4 age 30.5 (Vietnam) HC150108 adult 1 159 174 F S 1.19540
... ... ... ... ... ... ... ... ... ...
430155 ferret (WHO) WHOCCPerth ferret 1 (HA2)117 461 K T -0.50720
430156 ferret (WHO) WHOCCPerth ferret 1 (HA2)117 461 K Q -0.52437
430157 ferret (WHO) WHOCCPerth ferret 1 (HA2)117 461 K A -0.63954
430158 ferret (WHO) WHOCCPerth ferret 1 (HA2)117 461 K S -1.05150
430159 ferret (WHO) WHOCCPerth ferret 1 (HA2)117 461 K K 0.00000

226400 rows × 9 columns

Make plots for all sera:

[5]:
for name, df in avg_sel.groupby("name", sort=False):

    escape_chart = polyclonal.plot.lineplot_and_heatmap(
        data_df=df,
        stat_col="escape",
        category_col="epitope",
        addtl_tooltip_stats=["site_sequential"],
        init_floor_at_zero=True,
    ).properties(title=name)

    display(escape_chart)
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
[ ]: