Visualize distribution of amino-acid variants¶
In [1]:
# Imports
import os
import pandas as pd
import altair as alt
# Plotting colors
# re-arranged for plot
tol_muted_adjusted = [
"#AA4499",
"#88CCEE",
"#EE7733",
"#44AA99",
"#1f78b4",
"#CC6677",
"#117733",
"#999933",
"#DDCC77",
"#CC3311",
"#882255",
"#000000",
"#DDDDDD",
]
# Allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
In [2]:
# this cell is tagged as `parameters` for papermill parameterization
variant_data = None
out_dir = None
out_file = None
In [3]:
# Parameters
variant_data = "results/variants/codon_variants.csv"
out_dir = "results/summary_of_libraries/"
out_file = (
"results/summary_of_libraries/present_and_absent_mutations_in_DMS_libraries.csv"
)
In [4]:
# # Uncomment for running interactive
# variant_data = "../results/variants/codon_variants.csv"
# out_dir = "../results/summary_of_libraries/"
# out_file = "../results/summary_of_libraries/present_and_absent_mutations_in_DMS_libraries.csv"
In [5]:
# Load data
variant_df = pd.read_csv(variant_data)
# Group all variants with >= 8 amino-acid mutations
variant_df["n_aa_substitutions"] = variant_df["n_aa_substitutions"].apply(lambda x: 8 if x >= 8 else x)
distribution_plot = alt.Chart(variant_df).mark_bar(color="#000000", size=7).encode(
x=alt.X(
"n_aa_substitutions",
axis=alt.Axis(
title="AA muts",
values=[0, 1, 2, 3, 4, 5, 6, 7, 8],
domainWidth=1,
domainColor="black",
tickColor="black",
labelFontSize=8,
labelFontWeight="normal",
titleFontWeight="normal",
),
),
y=alt.Y(
"count()",
axis=alt.Axis(
title="number of variants",
values=[0, 5000, 10000, 15000],
domainWidth=1,
domainColor="black",
tickColor="black",
labelFontSize=8,
labelFontWeight="normal",
titleFontWeight="normal",
),
scale=alt.Scale(domain=[0,15000]),
),
facet=alt.Facet(
"library",
title=None,
columns=2,
header=alt.Header(
labelFontSize=8,
labelFontWeight="bold",
),
),
).properties(
width=75,
height=100,
).configure_axis(
grid=False,
labelFontSize=8,
titleFontSize=8,
labelFontWeight="normal",
)
distribution_plot
Out[5]: