Average mutation functional effects for a condition¶
Import Python modules.
We use polyclonal for the plotting:
import itertools
import math
import altair as alt
import dms_variants.utils
import pandas as pd
import polyclonal
import polyclonal.plot
This notebook is parameterized by papermill.
The next cell is tagged as parameters to get the passed parameters.
# this cell is tagged parameters for `papermill` parameterization
site_numbering_map_csv = None
mutation_annotations_csv = None
func_effects_csv = None
func_effects_singlemut_csv = None
latent_effects_csv = None
latent_html = None
functional_html = None
functional_singlemut_html = None
params = None
# Parameters
params = {
"plot_latent": True,
"avg_method": "median",
"per_selection_tooltips": True,
"floor_for_effect_std": -3,
"plot_kwargs": {
"addtl_slider_stats": {
"times_seen": 2,
"effect_std": 2,
"nt changes to codon": 3,
"n_selections": 1,
},
"addtl_slider_stats_as_max": ["effect_std", "nt changes to codon"],
"addtl_slider_stats_hide_not_filter": ["nt changes to codon"],
"heatmap_max_at_least": 1,
"heatmap_min_at_least": -1,
"init_floor_at_zero": False,
"init_site_statistic": "mean",
"site_zoom_bar_color_col": "region",
"slider_binding_range_kwargs": {
"times_seen": {"step": 1, "min": 1, "max": 25},
"n_selections": {"step": 1},
"nt changes to codon": {"step": 1, "min": 1, "max": 3},
},
},
"selections": {
"LibA-220210-293T_ACE2-1": [[1, 539]],
"LibA-220210-293T_ACE2-2": [[1, 539]],
"LibA-220302-293T_ACE2-1": [[1, 539]],
"LibA-220302-293T_ACE2-2": [[1, 539]],
"LibB-220302-293T_ACE2-1": [540, 541, [542, 1251]],
},
}
mutation_annotations_csv = "data/mutation_annotations.csv"
site_numbering_map_csv = "data/site_numbering_map.csv"
func_effects_csv = (
"results/func_effects/averages/293T_ACE2_entry_by_region_func_effects.csv"
)
func_effects_singlemut_csv = (
"results/func_effects/averages/293T_ACE2_entry_by_region_func_effects_singlemut.csv"
)
latent_effects_csv = (
"results/func_effects/averages/293T_ACE2_entry_by_region_latent_effects.csv"
)
functional_html = (
"results/func_effects/averages/293T_ACE2_entry_by_region_func_effects.html"
)
functional_singlemut_html = "results/func_effects/averages/293T_ACE2_entry_by_region_func_effects_singlemut.html"
latent_html = (
"results/func_effects/averages/293T_ACE2_entry_by_region_latent_effects.html"
)
Read the input data:
site_numbering_map = pd.read_csv(site_numbering_map_csv).rename(
columns={"reference_site": "site"}
)
assert site_numbering_map[["site", "sequential_site"]].notnull().all().all()
addtl_site_cols = [
c for c in site_numbering_map.columns if c != "site" and c.endswith("site")
]
sequential_to_site = site_numbering_map.set_index("sequential_site")["site"].to_dict()
assert len(sequential_to_site) == len(site_numbering_map)
if mutation_annotations_csv:
mutation_annotations = pd.read_csv(mutation_annotations_csv)
selections = params["selections"]
# get sites to keep for each selection (relevant if keeping only regions)
if isinstance(selections, list):
selection_sites = {
selection: set(sequential_to_site.values()) for selection in selections
}
else:
assert isinstance(selections, dict)
selection_sites = {}
for selection, region in selections.items():
region_sequential = []
for r in region:
if isinstance(r, int):
region_sequential.append(r)
else:
assert isinstance(r, list) and all(isinstance(ri, int) for ri in r), r
assert r[0] <= r[1], r
region_sequential += list(range(r[0], r[1] + 1))
selection_sites[selection] = [sequential_to_site[r] for r in region_sequential]
# get number of selections each site is kept in
n_selections_per_site = {
site: sum(site in selection_sites[selection] for selection in selections)
for site in site_numbering_map["site"]
}
func_effects = pd.concat(
[
pd.read_csv(f"results/func_effects/by_selection/{s}_func_effects.csv")
.assign(selection=s)
.query("site in @sites_to_keep")
for s, sites_to_keep in selection_sites.items()
],
ignore_index=True,
).assign(
times_seen=lambda x: x["times_seen"].astype("Int64"),
times_seen_singlemut=lambda x: x["times_seen_singlemut"].astype("Int64"),
)
# effects just from single mutant variants
func_effects_singlemut = func_effects.drop(
columns=["latent_phenotype_effect", "functional_effect", "times_seen"]
).rename(
columns={
"functional_effect_singlemut": "functional_effect",
"times_seen_singlemut": "times_seen",
}
)
# effects from global epistasis fitting of all variants
func_effects = func_effects.drop(
columns=["functional_effect_singlemut", "times_seen_singlemut"]
)
Correlations among selections¶
Compute the correlations in the mutation effects in terms of functional scores and latent-phenotype effects across selections. For the actual functional effects, we report those estimated from all variants and just from single mutants:
# We compute for several times seen values, get those:
try:
init_times_seen = params["plot_kwargs"]["addtl_slider_stats"]["times_seen"]
except KeyError:
print("No times seen in params, using a value of 3")
init_times_seen = 3
func_effects_tidy = func_effects.assign(
mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"]
).melt(
id_vars=["selection", "mutation", "times_seen"],
value_vars=["latent_phenotype_effect", "functional_effect"],
var_name="phenotype",
value_name="effect",
)
func_effects_tidy = pd.concat(
[
func_effects_tidy,
(
func_effects_singlemut.assign(
mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"],
phenotype="functional_effect_singlemut",
)
.rename(columns={"functional_effect": "effect"})
.drop(columns=["wildtype", "site", "mutant"])
),
],
).reset_index(drop=True)
# do analysis for each "times_seen"
func_effects_tidy = pd.concat(
[
func_effects_tidy.query("times_seen >= @t").assign(min_times_seen=t)
for t in [1, init_times_seen, 2 * init_times_seen]
]
)
corrs = (
dms_variants.utils.tidy_to_corr(
df=func_effects_tidy,
sample_col="selection",
label_col="mutation",
value_col="effect",
group_cols=["phenotype", "min_times_seen"],
)
.assign(r2=lambda x: x["correlation"] ** 2)
.drop(columns="correlation")
.assign(
min_times_seen=lambda x: "min times seen " + x["min_times_seen"].astype(str)
)
)
for phenotype, phenotype_corr in corrs.groupby("phenotype"):
corr_chart = (
alt.Chart(phenotype_corr)
.encode(
alt.X("selection_1", title=None),
alt.Y("selection_2", title=None),
column=alt.Column("min_times_seen", title=None),
color=alt.Color("r2", scale=alt.Scale(zero=True)),
tooltip=[
alt.Tooltip(c, format=".3g") if c == "r2" else c
for c in ["phenotype", "selection_1", "selection_2", "r2"]
],
)
.mark_rect(stroke="black")
.properties(width=alt.Step(15), height=alt.Step(15), title=phenotype)
.configure_axis(labelLimit=500)
)
display(corr_chart)
Make scatter plots of mutation effect correlations, making separate scatters for effects estimated on all variants and just single mutants:
print(f"Correlation scatter plots for times_seen filter of {init_times_seen}\n")
mutation_selection = alt.selection_point(
fields=["mutation"],
on="mouseover",
empty=False,
)
for effect_df, title in [
(func_effects, "Mutation effects from all variants"),
(func_effects_singlemut, "Mutation effects from just single-mutant variants"),
]:
corr_panels = []
selections = sorted(effect_df["selection"].unique())
df = (
effect_df.assign(
mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"]
)
.query("times_seen >= @init_times_seen")
.pivot_table(index="mutation", columns="selection", values="functional_effect")
.reset_index()
)
chart_base = alt.Chart(df).add_params(mutation_selection)
for sel1, sel2 in itertools.combinations(selections, 2):
corr_df = df[[sel1, sel2]].dropna(axis=0)
n = len(corr_df)
r = corr_df.corr().values[1, 0]
print(f"{n=} shared mutations for {sel1=} vs {sel2=}")
if n == 0:
continue
corr_panels.append(
chart_base.encode(
alt.X(sel1, scale=alt.Scale(nice=False, padding=4)),
alt.Y(sel2, scale=alt.Scale(nice=False, padding=4)),
size=alt.condition(mutation_selection, alt.value(80), alt.value(30)),
color=alt.condition(
mutation_selection, alt.value("red"), alt.value("black")
),
opacity=alt.condition(
mutation_selection, alt.value(1), alt.value(0.25)
),
tooltip=[
"mutation",
alt.Tooltip(sel1, format=".3f"),
alt.Tooltip(sel2, format=".3f"),
],
)
.mark_circle()
.properties(
width=160,
height=160,
title=alt.TitleParams(
f"R = {r:.2f}, N = {n}", fontSize=11, fontWeight="normal", dy=2
),
)
)
ncols = 4
corr_rows = []
for irow in range(int(math.ceil(len(corr_panels) / ncols))):
corr_rows.append(
alt.hconcat(
*[
corr_panels[irow * ncols + icol]
for icol in range(min(ncols, len(corr_panels[irow * ncols :])))
]
)
)
display(alt.vconcat(*corr_rows).configure_axis(grid=False).properties(title=title))
Correlation scatter plots for times_seen filter of 2 n=1425 shared mutations for sel1='LibA-220210-293T_ACE2-1' vs sel2='LibA-220210-293T_ACE2-2' n=1460 shared mutations for sel1='LibA-220210-293T_ACE2-1' vs sel2='LibA-220302-293T_ACE2-1' n=1425 shared mutations for sel1='LibA-220210-293T_ACE2-1' vs sel2='LibA-220302-293T_ACE2-2' n=0 shared mutations for sel1='LibA-220210-293T_ACE2-1' vs sel2='LibB-220302-293T_ACE2-1' n=1425 shared mutations for sel1='LibA-220210-293T_ACE2-2' vs sel2='LibA-220302-293T_ACE2-1' n=1489 shared mutations for sel1='LibA-220210-293T_ACE2-2' vs sel2='LibA-220302-293T_ACE2-2'
n=0 shared mutations for sel1='LibA-220210-293T_ACE2-2' vs sel2='LibB-220302-293T_ACE2-1' n=1425 shared mutations for sel1='LibA-220302-293T_ACE2-1' vs sel2='LibA-220302-293T_ACE2-2' n=0 shared mutations for sel1='LibA-220302-293T_ACE2-1' vs sel2='LibB-220302-293T_ACE2-1' n=0 shared mutations for sel1='LibA-220302-293T_ACE2-2' vs sel2='LibB-220302-293T_ACE2-1'
n=60 shared mutations for sel1='LibA-220210-293T_ACE2-1' vs sel2='LibA-220210-293T_ACE2-2' n=61 shared mutations for sel1='LibA-220210-293T_ACE2-1' vs sel2='LibA-220302-293T_ACE2-1' n=60 shared mutations for sel1='LibA-220210-293T_ACE2-1' vs sel2='LibA-220302-293T_ACE2-2' n=0 shared mutations for sel1='LibA-220210-293T_ACE2-1' vs sel2='LibB-220302-293T_ACE2-1' n=60 shared mutations for sel1='LibA-220210-293T_ACE2-2' vs sel2='LibA-220302-293T_ACE2-1' n=62 shared mutations for sel1='LibA-220210-293T_ACE2-2' vs sel2='LibA-220302-293T_ACE2-2' n=0 shared mutations for sel1='LibA-220210-293T_ACE2-2' vs sel2='LibB-220302-293T_ACE2-1' n=60 shared mutations for sel1='LibA-220302-293T_ACE2-1' vs sel2='LibA-220302-293T_ACE2-2' n=0 shared mutations for sel1='LibA-220302-293T_ACE2-1' vs sel2='LibB-220302-293T_ACE2-1' n=0 shared mutations for sel1='LibA-220302-293T_ACE2-2' vs sel2='LibB-220302-293T_ACE2-1'
Average functional effects¶
We average the functional effects across selections. The resulting file has the average functional effect for each selection (either mean or median), the mean times seen across selections, the number of selections each mutation is seen in, and the effect (times seen) for each individual selection. We do this for effects estimated on all variants, just single-mutant variants, and latent-phenotype functional effects.
avg_method = params["avg_method"]
assert avg_method in {"mean", "median"}, avg_method
func_effects_by_phenotype = func_effects.melt(
id_vars=["site", "wildtype", "mutant", "times_seen", "selection"],
value_vars=["functional_effect", "latent_phenotype_effect"],
var_name="phenotype",
value_name="effect",
)
func_effects_by_phenotype = pd.concat(
[
func_effects_by_phenotype,
func_effects_singlemut.assign(phenotype="functional_effect_singlemut").rename(
columns={"functional_effect": "effect"}
),
]
).reset_index(drop=True)
if "floor_for_effect_std" in params:
floor_for_effect_std = params["floor_for_effect_std"]
print(f"For computing effect std, first floor at {floor_for_effect_std=}")
else:
floor_for_effect_std = None
func_effects_by_phenotype["effect_floored"] = func_effects_by_phenotype["effect"].clip(
lower=floor_for_effect_std
)
avg_func_effects = (
func_effects_by_phenotype.query("effect.notnull()")
.groupby(["phenotype", "site", "wildtype", "mutant"], as_index=False)
.aggregate(
effect=pd.NamedAgg("effect", avg_method),
effect_std=pd.NamedAgg("effect_floored", lambda s: s.std(ddof=0)),
times_seen=pd.NamedAgg("times_seen", "sum"),
n_selections=pd.NamedAgg("site", "count"),
)
.assign(
times_seen=lambda x: (
x["times_seen"] / x["site"].map(n_selections_per_site)
).where(
x["mutant"] != x["wildtype"],
pd.NA,
)
)
)
assert not set(selections).intersection(avg_func_effects.columns)
# add per-selection effects (times_seen)
avg_func_effects = avg_func_effects.merge(
(
func_effects_by_phenotype.assign(
effect_times_seen=lambda x: (
x["effect"].map(lambda e: f"{e:.2f}")
+ (" (" + x["times_seen"].astype(str) + ")").where(
x["mutant"] != x["wildtype"],
"",
)
)
)
.pivot_table(
index=[
"site",
"wildtype",
"mutant",
"phenotype",
],
values="effect_times_seen",
columns="selection",
aggfunc=lambda s: ",".join(s),
)
.reset_index()
),
on=["phenotype", "site", "wildtype", "mutant"],
validate="one_to_one",
)
for phenotype, csv_file in [
("functional_effect", func_effects_csv),
("functional_effect_singlemut", func_effects_singlemut_csv),
("latent_phenotype_effect", latent_effects_csv),
]:
print(f"Writing {phenotype} to {csv_file}")
(
avg_func_effects.query("phenotype == @phenotype")[
[
"site",
"wildtype",
"mutant",
"effect",
"effect_std",
"times_seen",
"n_selections",
]
].to_csv(csv_file, index=False, float_format="%.4g")
)
For computing effect std, first floor at floor_for_effect_std=-3
Writing functional_effect to results/func_effects/averages/293T_ACE2_entry_by_region_func_effects.csv Writing functional_effect_singlemut to results/func_effects/averages/293T_ACE2_entry_by_region_func_effects_singlemut.csv Writing latent_phenotype_effect to results/func_effects/averages/293T_ACE2_entry_by_region_latent_effects.csv
Correlation of avg func effects estimated from all variant and just single-mutant variants¶
Plot this correlation scatter plot, noting how many mutations with estimates from each type pass the times seen filter:
all_single_scatter_df = (
avg_func_effects.query("phenotype == 'functional_effect'")[
["site", "wildtype", "mutant", "effect", "times_seen"]
]
.rename(columns={"effect": "effect_all", "times_seen": "times_seen_all"})
.merge(
(
avg_func_effects.query("phenotype == 'functional_effect_singlemut'")[
["site", "wildtype", "mutant", "effect", "times_seen"]
].rename(
columns={
"effect": "effect_singlemut",
"times_seen": "times_seen_singlemut",
}
)
),
how="outer",
validate="one_to_one",
on=["site", "wildtype", "mutant"],
)
.assign(
times_seen=lambda x: x[["times_seen_all", "times_seen_singlemut"]].min(axis=1),
mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"],
)
.drop(columns=["site", "wildtype", "mutant"])
)
assert len(all_single_scatter_df) == len(
avg_func_effects.query("phenotype == 'functional_effect'")
)
times_seen_scatter_slider = alt.param(
value=init_times_seen,
bind=alt.binding_range(
name="minimum times_seen",
min=0,
max=avg_func_effects["times_seen"].quantile(0.9),
),
)
all_single_base = alt.Chart(all_single_scatter_df).add_params(times_seen_scatter_slider)
all_single_scatter = (
all_single_base.transform_filter(
alt.datum["times_seen"] >= times_seen_scatter_slider
)
.encode(
alt.X("effect_all", scale=alt.Scale(nice=False, padding=4)),
alt.Y("effect_singlemut", scale=alt.Scale(nice=False, padding=4)),
tooltip=[
"mutation",
alt.Tooltip("effect_all", format=".3f"),
alt.Tooltip("effect_singlemut", format=".3f"),
alt.Tooltip("times_seen_all", format=".1f"),
alt.Tooltip("times_seen_singlemut", format=".1f"),
],
)
.mark_circle(color="black", size=30, opacity=0.25)
.properties(width=250, height=250, title="all-variant vs single-mutant effects")
)
all_single_corr = (
all_single_scatter.transform_regression(
"effect_all", "effect_singlemut", params=True
)
.transform_calculate(
r=alt.expr.if_(
alt.datum["coef"][1] > 0,
alt.expr.sqrt(alt.datum["rSquared"]),
-alt.expr.sqrt(alt.datum["rSquared"]),
),
r_text="r = " + alt.expr.format(alt.datum["r"], ".2f"),
)
.encode(
text="r_text:N",
x=alt.value(5),
y=alt.value(10),
)
.mark_text(size=14, align="left", color="blue")
)
n_charts = [
(
all_single_base.transform_calculate(
exceeds=alt.expr.if_(
(
alt.expr.isValid(alt.datum[f"times_seen_{vartype}"])
& (alt.datum[f"times_seen_{vartype}"] > times_seen_scatter_slider)
),
1,
0,
)
)
.transform_aggregate(n="sum(exceeds)")
.transform_calculate(
n_text=f"N_{vartype} = " + alt.expr.format(alt.datum.n, "d")
)
.encode(
text="n_text:N",
x=alt.value(5),
y=alt.value(10 + (i + 1) * 15),
)
.mark_text(size=14, align="left", color="blue")
)
for i, vartype in enumerate(["all", "singlemut"])
]
all_single_scatter + all_single_corr + n_charts[0] + n_charts[1]
Make plots¶
Set up keyword arguments to https://jbloomlab.github.io/polyclonal/polyclonal.plot.html#polyclonal.plot.lineplot_and_heatmap if they are not already specified:
plot_kwargs = params["plot_kwargs"]
if "addtl_slider_stats" not in plot_kwargs:
plot_kwargs["addtl_slider_stats"] = {}
if "times_seen" not in plot_kwargs["addtl_slider_stats"]:
plot_kwargs["addtl_slider_stats"]["times_seen"] = 3
if "effect_std" not in plot_kwargs["addtl_slider_stats"]:
plot_kwargs["addtl_slider_stats"]["effect_std"] = avg_func_effects[
"effect_std"
].max()
if "addtl_slider_stats_as_max" not in plot_kwargs:
plot_kwargs["addtl_slider_stats_as_max"] = ["effect_std"]
else:
plot_kwargs["addtl_slider_stats_as_max"].append("effect_std")
elif "addtl_slider_stats_as_max" not in plot_kwargs:
raise ValueError(
"You specified `effect_std` in `addtl_slider_stats` but did not add it to "
"`addtl_slider_stats_as_max`. If you really do not want `effect_std` in "
"`addtl_slider_stats_as_max`, then specify that list without it."
)
if "n_selections" not in plot_kwargs["addtl_slider_stats"]:
plot_kwargs["addtl_slider_stats"]["n_selections"] = min(
max(n_selections_per_site.values()) // 2 + 1,
avg_func_effects["n_selections"].max(),
)
if "site_zoom_bar_color_col" in plot_kwargs:
if plot_kwargs["site_zoom_bar_color_col"] in avg_func_effects.columns:
pass
elif plot_kwargs["site_zoom_bar_color_col"] in site_numbering_map.columns:
avg_func_effects = avg_func_effects.merge(
site_numbering_map[["site", plot_kwargs["site_zoom_bar_color_col"]]],
on="site",
validate="many_to_one",
how="left",
)
if mutation_annotations_csv:
if not {"site", "mutant"}.issubset(mutation_annotations.columns):
raise ValueError(f"{mutation_annotations.columns=} lacks 'site', 'mutant'")
if set(mutation_annotations.columns).intersection(avg_func_effects.columns) != {
"site",
"mutant",
}:
raise ValueError(
f"{mutation_annotations.columns=} shares columns with {avg_func_effects.columns=}"
)
avg_func_effects = avg_func_effects.merge(
mutation_annotations,
on=["site", "mutant"],
how="left",
validate="many_to_one",
)
for col in mutation_annotations.columns:
if col not in {"site", "mutant"}:
avg_func_effects[col] = avg_func_effects[col].where(
avg_func_effects["wildtype"] != avg_func_effects["mutant"], pd.NA
)
if "addtl_tooltip_stats" not in plot_kwargs:
plot_kwargs["addtl_tooltip_stats"] = []
for c in ["effect_std"] + addtl_site_cols:
if c not in plot_kwargs["addtl_tooltip_stats"]:
plot_kwargs["addtl_tooltip_stats"].append(c)
if "sequential_site" not in avg_func_effects.columns:
avg_func_effects = avg_func_effects.merge(
site_numbering_map[["site", *addtl_site_cols]],
on="site",
validate="many_to_one",
how="left",
)
if any(avg_func_effects["site"] != avg_func_effects["sequential_site"]):
if "sequential_site" not in plot_kwargs["addtl_tooltip_stats"]:
plot_kwargs["addtl_tooltip_stats"].append("sequential_site")
if params["per_selection_tooltips"]:
assert set(selections).issubset(avg_func_effects.columns)
plot_kwargs["addtl_tooltip_stats"] += [
s for s in selections if s not in plot_kwargs["addtl_tooltip_stats"]
]
if "alphabet" not in plot_kwargs:
plot_kwargs["alphabet"] = [
a
for a in polyclonal.alphabets.biochem_order_aas(polyclonal.AAS_WITHSTOP_WITHGAP)
if a in set(avg_func_effects["mutant"])
]
if "sites" not in plot_kwargs:
plot_kwargs["sites"] = site_numbering_map.sort_values("sequential_site")[
"site"
].tolist()
Do we plot latent phenotype effects?
plot_latent = params["plot_latent"] if "plot_latent" in params else False
print(f"{plot_latent=}")
plot_latent=True
First plot standard deviation in functional effects measurements across libraries versus effect. This is useful to look at if you have imposed a filter on the standard deviation on the functional effects to filter variants with large variation.
times_seen_slider = alt.param(
value=plot_kwargs["addtl_slider_stats"]["times_seen"],
bind=alt.binding_range(
name="minimum times_seen",
min=0,
max=avg_func_effects["times_seen"].quantile(0.9),
),
)
effect_std_slider = alt.param(
value=plot_kwargs["addtl_slider_stats"]["effect_std"],
bind=alt.binding_range(
name="maximum effect_std",
min=0,
max=avg_func_effects["effect_std"].max(),
),
)
std_df = avg_func_effects.assign(
mutation=lambda x: x["wildtype"] + x["site"].astype(str) + x["mutant"]
)[["phenotype", "mutation", "effect", "effect_std", "times_seen", *selections]].rename(
columns={c: c.replace(".", "_") for c in avg_func_effects.columns}
)
if not plot_latent:
std_df = std_df[std_df["phenotype"] != "latent_phenotype_effect"]
std_chart = (
alt.Chart(std_df)
.add_params(effect_std_slider, times_seen_slider, mutation_selection)
.transform_filter(alt.datum["times_seen"] >= times_seen_slider)
.transform_calculate(
above_max_effect_std=alt.datum["effect_std"] > effect_std_slider
)
.encode(
alt.X("effect_std", title=f"effect_std (with {floor_for_effect_std=})"),
alt.Y("effect"),
alt.Color(
"above_max_effect_std:N",
scale=alt.Scale(domain=[False, True]),
legend=alt.Legend(orient="bottom", symbolOpacity=1),
),
size=alt.condition(mutation_selection, alt.value(80), alt.value(30)),
strokeWidth=alt.condition(mutation_selection, alt.value(3), alt.value(0.5)),
opacity=alt.condition(mutation_selection, alt.value(1), alt.value(0.2)),
tooltip=[
alt.Tooltip(c, format=".3g") if std_df[c].dtype == float else c
for c in std_df.columns
],
column="phenotype",
)
.mark_circle(stroke="black")
.resolve_scale(x="independent", y="independent")
.properties(width=250, height=250)
.configure_axis(grid=False)
)
std_chart
Now make the plots of mutation effects for functional effects from all variants, functional effects from single-mutant variants only, and latent-phenotype effects for all variants:
plots_to_make = [
("functional_effect", functional_html),
("functional_effect_singlemut", functional_singlemut_html),
]
if plot_latent:
plots_to_make.append(("latent_phenotype_effect", latent_html))
for phenotype, plotfile in plots_to_make:
print(f"\n\nPlotting {phenotype} and saving to {plotfile}")
df = avg_func_effects.query("phenotype == @phenotype")
chart = polyclonal.plot.lineplot_and_heatmap(
data_df=df,
stat_col="effect",
category_col="phenotype",
**plot_kwargs,
)
chart.save(plotfile)
display(chart)
Plotting functional_effect and saving to results/func_effects/averages/293T_ACE2_entry_by_region_func_effects.html
Plotting functional_effect_singlemut and saving to results/func_effects/averages/293T_ACE2_entry_by_region_func_effects_singlemut.html
Plotting latent_phenotype_effect and saving to results/func_effects/averages/293T_ACE2_entry_by_region_latent_effects.html