Process data from Perth/2009 HA¶
Some of these data are taken from a previously published study (Lee et al (2019)); the rest represent unpublished data by Rachel Eguia and Juhye Lee in this repository (https://github.com/jbloomlab/map_flu_serum_Vietnam_H3_Perth2009).
The Eguia samples were all collected in Vietnam in 2010-2011.
Import Python modules:
[1]:
import altair as alt
import pandas as pd
import polyclonal.plot
Read the data:
[2]:
avg_sel_tidy = pd.concat(
[
pd.read_csv(csv_file, low_memory=False).assign(dataset=dataset)
for csv_file, dataset in [
("results/perth2009/eguia_avg_sel_tidy.csv", "Eguia"),
("results/perth2009/lee_avg_sel_tidy.csv", "Lee2019"),
]
],
ignore_index=True,
)
assert (
avg_sel_tidy["serum_name_formatted"].nunique()
== avg_sel_tidy["serum"].nunique()
== len(avg_sel_tidy.groupby(["serum", "dataset"]))
)
Make a dictionary to rename samples, and also specify those to drop:
[3]:
sample_rename = {
# Vietnam cohort samples from Eguia study (collected 2010-2011)
"age-30.5": "age 30.5 (Vietnam)",
"age-31.5": "age 31.5 (Vietnam)",
"age-33.5": "age 33.5 (Vietnam)",
"age-2.1": "age 2.1 (Vietnam)",
"age-2.2": "age 2.2 (Vietnam)",
"age-2.4": "age 2.4 (Vietnam)",
"age-2.5": "age 2.5 (Vietnam)",
"age-2.5-b": "age 2.5b (Vietnam)",
"age-3.3": "age 3.3 (Vietnam)",
"age-3.3-b": "age 3.3b (Vietnam)",
"age-3.4": "age 3.4 (Vietnam)",
"age-3.5": "age 3.5 (Vietnam)",
# previously published samples from Lee et al 2019; only keeping ones from 2009-2010 timeframe
"2010-age-21": "age 21 (Seattle)",
"2009-age-53": "age 53 (Seattle)",
"2009-age-64": "age 64 (Seattle)",
"2009-age-65": "age 65 (Seattle)",
# ferrets infected with Perth/2009
"ferret-Pitt-1-postinf": "ferret 1 (Pitt)",
"ferret-Pitt-2-postinf": "ferret 2 (Pitt)",
"ferret-Pitt-3-postinf": "ferret 3 (Pitt)",
"ferret-WHO-Perth2009": "ferret (WHO)",
}
samples_to_drop = [
# ignore antibodies, not relevant to this study
"antibody-4F03",
"antibody-1C04",
"antibody-5A01",
"antibody-3C04",
"antibody-3C06",
"antibody-4C01",
# collected too late to be comparable to Vietnam samples
"2015-age-25-prevacc",
"2015-age-25-vacc",
"2015-age-29-prevacc",
"2015-age-29-vacc",
"2015-age-48-prevacc",
"2015-age-48-vacc",
"2015-age-49-prevacc",
"2015-age-49-vacc",
# antibody spike-in samples, not relevant to this study
"2009-age-65-with-low-4F03",
"2009-age-65-with-mid-4F03",
"2009-age-65-with-hi-4F03",
# duplicate sample from an individual, don't include repeats
"2009-age-53-plus-2-months",
# pre-infection ferrets aren't relevant
"ferret-Pitt-1-preinf",
"ferret-Pitt-2-preinf",
"ferret-Pitt-3-preinf",
# ferret infected with a different strain
"ferret-WHO-Victoria2011",
]
Make consistent with the format produced by polyclonal. All of these are single epitope so we just assign epitope to one:
[4]:
assert set(avg_sel_tidy["serum_name_formatted"]) == set(sample_rename).union(samples_to_drop)
avg_sel = (
avg_sel_tidy
.query("serum_name_formatted not in @samples_to_drop")
.assign(
name=lambda x: x["serum_name_formatted"].map(sample_rename),
epitope=1,
escape=lambda x: x["mutdiffsel"].where(x["wildtype"] != x["mutation"], 0),
)
.rename(columns={"isite": "site_sequential", "mutation": "mutant"})
[[
"name",
"serum",
"serum_group",
"epitope",
"site",
"site_sequential",
"wildtype",
"mutant",
"escape",
]]
)
assert set(avg_sel["name"]) == set(sample_rename.values())
output_csv = "results/perth2009/merged_escape.csv"
print(f"Writing to {output_csv}")
avg_sel.to_csv(output_csv, index=False, float_format="%.4g")
avg_sel
Writing to results/perth2009/merged_escape.csv
[4]:
| name | serum | serum_group | epitope | site | site_sequential | wildtype | mutant | escape | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | age 30.5 (Vietnam) | HC150108 | adult | 1 | 159 | 174 | F | G | 3.48230 |
| 1 | age 30.5 (Vietnam) | HC150108 | adult | 1 | 159 | 174 | F | N | 2.45110 |
| 2 | age 30.5 (Vietnam) | HC150108 | adult | 1 | 159 | 174 | F | H | 2.33410 |
| 3 | age 30.5 (Vietnam) | HC150108 | adult | 1 | 159 | 174 | F | T | 1.51360 |
| 4 | age 30.5 (Vietnam) | HC150108 | adult | 1 | 159 | 174 | F | S | 1.19540 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 430155 | ferret (WHO) | WHOCCPerth | ferret | 1 | (HA2)117 | 461 | K | T | -0.50720 |
| 430156 | ferret (WHO) | WHOCCPerth | ferret | 1 | (HA2)117 | 461 | K | Q | -0.52437 |
| 430157 | ferret (WHO) | WHOCCPerth | ferret | 1 | (HA2)117 | 461 | K | A | -0.63954 |
| 430158 | ferret (WHO) | WHOCCPerth | ferret | 1 | (HA2)117 | 461 | K | S | -1.05150 |
| 430159 | ferret (WHO) | WHOCCPerth | ferret | 1 | (HA2)117 | 461 | K | K | 0.00000 |
226400 rows × 9 columns
Make plots for all sera:
[5]:
for name, df in avg_sel.groupby("name", sort=False):
escape_chart = polyclonal.plot.lineplot_and_heatmap(
data_df=df,
stat_col="escape",
category_col="epitope",
addtl_tooltip_stats=["site_sequential"],
init_floor_at_zero=True,
).properties(title=name)
display(escape_chart)
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
/fh/fast/bloom_j/computational_notebooks/fwelsh/flu_h3_hk19_dms/.snakemake/conda/240f53b6388b7c3f00c26c27af3893a9_/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for col_name, dtype in df.dtypes.iteritems():
[ ]: