# Imports
import os
import pandas as pd
import numpy as np
import scipy as sp
import altair as alt
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import ticker as mticker

# Plotting colors
tol_muted_adjusted = [
    "#000000",
    "#CC6677", 
    "#1f78b4", 
    "#DDCC77", 
    "#117733", 
    "#882255", 
    "#88CCEE",
    "#44AA99", 
    "#999933", 
    "#AA4499", 
    "#EE7733",
    "#CC3311",
    "#DDDDDD",
]

# Seaborn style settings
sns.set(rc={
    "figure.dpi":300, 
    "savefig.dpi":300,
    "svg.fonttype":"none",
})
sns.set_style("ticks")
# sns.set_palette(tol_muted_adjusted)

# Allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

# this cell is tagged as `parameters` for papermill parameterization
HEK293T_data_path = None
humanDAG1_data_path = None
mastomysDAG1_data_path = None
shift_file_dir = None
# Minimum times seen filter
MTS = None 
n_selections = None

html_dir = None
html_output = None
multidms_shrinkage_plot = None
multidms_shift_profile = None

# Parameters
HEK293T_data_path = "results/func_effects/averages/293T_entry_func_effects.csv"
humanDAG1_data_path = "results/func_effects/averages/human_293T_entry_func_effects.csv"
mastomysDAG1_data_path = (
    "results/func_effects/averages/mastomys_293T_entry_func_effects.csv"
)
MTS = 2
n_selections = 8
shift_file_dir = "results/func_effect_shifts/by_comparison/"
html_dir = "results/DAG1_ortholog_correlations/"
html_output = "results/DAG1_ortholog_correlations/DAG1_ortholog_correlations.html"
multidms_shrinkage_plot = "results/DAG1_ortholog_correlations/DAG1_shrinkage_plot.svg"
multidms_shift_profile = "results/DAG1_ortholog_correlations/multidms_shift_profile.svg"

# # Uncomment for running interactive
# HEK293T_data_path = "../results/func_effects/averages/293T_entry_func_effects.csv"
# humanDAG1_data_path = "../results/func_effects/averages/human_293T_entry_func_effects.csv"
# mastomysDAG1_data_path = "../results/func_effects/averages/mastomys_293T_entry_func_effects.csv"
# shift_file_dir = "../results/func_effect_shifts/by_comparison/"
# # Minimum times seen filter
# MTS = 2
# n_selections = 8

# html_dir = "../results/DAG1_ortholog_correlations/"
# html_output = "../results/DAG1_ortholog_correlations/DAG1_ortholog_correlations.html"
# multidms_shrinkage_plot = "../results/DAG1_ortholog_correlations/DAG1_shrinkage_plot.svg"
# multidms_shift_profile = "../results/DAG1_ortholog_correlations/multidms_shift_profile.svg"

# Read multiDMS analysis files into one df
comparisons = [
    "LibA-1",
    "LibA-2",
    "LibA-3",
    "LibA-4",
    "LibB-1",
    "LibB-2",
    "LibB-3",
    "LibB-4",
]

shifts = [
    pd.read_csv(f"{shift_file_dir}/{c}_shifts.csv").assign(
        comparison=c,
        lasso_shift=lambda x: x["lasso_shift"].astype(float),
    )
    for c in comparisons
]

# Check all shift comparisons are comparable:
for shift_df in shifts[1:]:
    if (shift_df.columns != shifts[0].columns).any():
        raise ValueError("comparisons do not all have the same columns")
    if set(shift_df["lasso_shift"]) != set(shifts[0]["lasso_shift"]):
        raise ValueError("comparisons do not all have the same `lasso_shifts`")

shifts = pd.concat(shifts)

# Add a times_seen column that is the average of all of the times_seen in all conditions
times_seen_cols = [c for c in shifts.columns if c.startswith("times_seen_")]
shifts["times_seen"] = shifts[times_seen_cols].mean(axis=1)

# Get shifts in tidy format
shift_cols = [c for c in shifts.columns if c.startswith("shift_")]
shifts_tidy = shifts.melt(
    id_vars=[
        "comparison",
        "site",
        "wildtype",
        "mutant",
        "lasso_shift",
        "times_seen",
        "latent_phenotype_effect",
    ],
    value_vars=shift_cols,
    var_name="condition",
    value_name="shift",
)

# Average times_seen & latent_phenotype_effect across comparisons, pivot on comparisons
shifts_comparison_pivoted = (
    shifts_tidy.assign(
        times_seen=lambda x: x.groupby(["site", "mutant", "lasso_shift"])[
            "times_seen"
        ].transform("mean"),
        latent_phenotype_effect=lambda x: x.groupby(["site", "mutant", "lasso_shift"])[
            "latent_phenotype_effect"
        ].transform("mean"),
    )
    .pivot_table(
        index=[
            "site",
            "wildtype",
            "mutant",
            "latent_phenotype_effect",
            "times_seen",
            "lasso_shift",
            "condition",
        ],
        values="shift",
        columns="comparison",
    )
    .reset_index()
)

# Calculate median shift values for each comparison
shifts_comparison_pivoted["LibA_shift"] = (
    shifts_comparison_pivoted[[
        "LibA-1",
        "LibA-2",
        "LibA-3",
        "LibA-4",
    ]].median(axis=1)
)
shifts_comparison_pivoted["LibB_shift"] = (
    shifts_comparison_pivoted[[
        "LibB-1",
        "LibB-2",
        "LibB-3",
        "LibB-4",
    ]].median(axis=1)
)
shifts_comparison_pivoted["median_shift"] = (
    shifts_comparison_pivoted[[
        "LibA-1",
        "LibA-2",
        "LibA-3",
        "LibA-4",
        "LibB-1",
        "LibB-2",
        "LibB-3",
        "LibB-4",
    ]].median(axis=1)
)

# Add column with number of comparisons
shifts_comparison_pivoted["n_comparisons"] = (
    shifts_comparison_pivoted[[
        "LibA-1",
        "LibA-2",
        "LibA-3",
        "LibA-4",
        "LibB-1",
        "LibB-2",
        "LibB-3",
        "LibB-4",
    ]].notnull().sum(axis=1)
)

# Make new column with mutation string and nonsense mutations
shifts_comparison_pivoted["mutation"] = (
    shifts_comparison_pivoted["wildtype"] + shifts_comparison_pivoted["site"].astype(str) + shifts_comparison_pivoted["mutant"]
)
shifts_comparison_pivoted["mutation type"] = shifts_comparison_pivoted["mutant"].apply(lambda x: "nonsense" if x == "*" else "missense")

# Filter data for min times seen and number of comparisons
shifts_comparison_pivoted = (
    shifts_comparison_pivoted.loc[
        (shifts_comparison_pivoted["times_seen"] >= MTS)
        &
        (shifts_comparison_pivoted["n_comparisons"] == n_selections)
    ]
    .copy()
    .drop(columns=[
        "LibA-1",
        "LibA-2",
        "LibA-3",
        "LibA-4",
        "LibB-1",
        "LibB-2",
        "LibB-3",
        "LibB-4",
    ])
)

# Re-order dataframe
libA_measurements = (
    shifts_comparison_pivoted
    .copy()
    .drop(columns=["LibB_shift"])
    .rename(columns={"LibA_shift" : "shift"})
)
libA_measurements["library"] = "library A"
libB_measurements = (
    shifts_comparison_pivoted
    .copy()
    .drop(columns=["LibA_shift"])
    .rename(columns={"LibB_shift" : "shift"})
)
libB_measurements["library"] = "library B"
shifts_comparison_pivoted = (
    pd.concat([libA_measurements, libB_measurements])
    .reset_index(drop=True)
)

# Create dataframe for sparsity metric (ie % zero shift values)
sparsity_df = shifts_comparison_pivoted.copy()
sparsity_df["zero shift"] = sparsity_df["shift"].apply(lambda x: 1 if x == 0 else 0)
sparsity_df["nonzero shift"] = sparsity_df["shift"].apply(lambda x: 1 if (x > 0 or x < 0) else 0)
sparsity_df = (
    sparsity_df
    .groupby(["lasso_shift", "condition", "library", "mutation type"])
    .aggregate({
        "zero shift" : "sum",
        "nonzero shift" : "sum",
    })
    .reset_index()
)
sparsity_df["total"] = sparsity_df["zero shift"] + sparsity_df["nonzero shift"]
sparsity_df["percent zero shift"] = (sparsity_df["zero shift"] / sparsity_df["total"]) * 100

# Map lasso weights to integer numbers for easy plotting
list_lasso_weights = [0, 0.000001, 0.000005, 0.00001, 0.00005, 0.0001, 0.0005, 0.001]
lasso_weight_dict = dict(
    zip(list_lasso_weights, [0,1,2,3,4,5,6,7])
)

fig, axes = plt.subplots(
    2, 
    2, 
    figsize=(6.2,3),
    sharex=True,
    sharey="row",
)

# Plot shift values 
for i,condition in enumerate(shifts_comparison_pivoted["condition"].unique().tolist()):

    # Extract data for condition
    data = (
        shifts_comparison_pivoted.loc[
            (shifts_comparison_pivoted["condition"] == condition)
        ]
        .copy()
        .sort_values(by="mutation type")
        .reset_index()
    )
    data["lasso_shift"] = data["lasso_shift"].map(lasso_weight_dict)

    plot = sns.lineplot(
        data=data,
        x="lasso_shift",
        y="shift",
        hue="mutation type",
        style="library",
        palette={"nonsense" : "#CC3311", "missense" : "#DDDDDD"},
        estimator=None,
        lw=0.25,
        units="mutation",
        ax=axes[0][i],
    )
    plot.set_title(f"{condition.split('_')[1]} \u03B1-DG", fontsize=8)
    plot.set_ylim(-3.5, 3.5)
    plot.xaxis.set_minor_locator(mticker.NullLocator())  # no minor ticks
    plot.set_xticks(list(lasso_weight_dict.values()))
    xticklabels = ["0", "1e-6", "5e-6", "1e-5", "5e-5", "1e-4", "5e-4", "1e-3",]
    plot.set_xticklabels(xticklabels, size=8, rotation=90)
    plot.set_yticks([-3,0,3])
    plot.set_yticklabels(plot.get_yticks(), size=8)
    plot.set(xlabel=None)
    plot.set_ylabel("shift", fontsize=8)
    

    # Change all spines
    for axis in ["top", "bottom", "left", "right"]:
        plot.spines[axis].set_linewidth(1)
    plot.tick_params(axis="both", length=3, width=1)
    sns.despine()

    # Make only one legend
    if i == 1:
        sns.move_legend(
            plot, 
            "upper left", 
            bbox_to_anchor=(1.1, 1),
            fontsize=8,
            markerscale=1,
            handletextpad=0.2,
            frameon=False,
            borderaxespad=0.1,
        )
        plot.get_legend().get_texts()[0].set_weight("bold")
        plot.get_legend().get_texts()[3].set_weight("bold")
    else:
        plot.get_legend().remove()

    # Draw vertical line at chosen lasso weight
    plot.axvline(
        x=5,
        color="#00000026",
        lw=10,
    )

# Plot sparsity (ie % shift values = 0)
for i,condition in enumerate(sparsity_df["condition"].unique().tolist()):

    # Extract data for condition
    data = (
        sparsity_df.loc[
            (sparsity_df["condition"] == condition)
        ]
        .copy()
        .sort_values(by="mutation type")
        .reset_index(drop=True)
    )
    data["lasso_shift"] = data["lasso_shift"].map(lasso_weight_dict)

    plot = sns.lineplot(
        data=data,
        x="lasso_shift",
        y="percent zero shift",
        hue="mutation type",
        style="library",
        markers=["o", "s"],
        palette={"nonsense" : "#CC3311", "missense" : "#DDDDDD"},
        lw=1,
        markersize=4,
        markeredgewidth=0.5, 
        markeredgecolor="black", 
        ax=axes[1][i],
    )
    plot.set_title(f"{condition.split('_')[1]} \u03B1-DG", fontsize=8)
    plot.set_ylim(-5,105)
    plot.xaxis.set_minor_locator(mticker.NullLocator())  # no minor ticks
    plot.set_xticks(list(lasso_weight_dict.values()))
    xticklabels = ["0", "1e-6", "5e-6", "1e-5", "5e-5", "1e-4", "5e-4", "1e-3",]
    plot.set_xticklabels(xticklabels, size=8, rotation=90)
    plot.set_yticks([0, 25, 50, 75, 100])
    plot.set_yticklabels(plot.get_yticks(), size=8)
    plot.set(xlabel=None)
    plot.set_ylabel("sparsity\n(% shift values = 0)", fontsize=8)

    # Change all spines
    for axis in ["top", "bottom", "left", "right"]:
        plot.spines[axis].set_linewidth(1)
    plot.tick_params(axis="both", length=3, width=1)
    sns.despine()

    # Make only one legend
    if i == 1:
        sns.move_legend(
            plot, 
            "upper left", 
            bbox_to_anchor=(1.1, 1),
            fontsize=8,
            markerscale=1,
            handletextpad=0.2,
            frameon=False,
            borderaxespad=0.1,
        )
        plot.get_legend().get_texts()[0].set_weight("bold")
        plot.get_legend().get_texts()[3].set_weight("bold")
    else:
        plot.get_legend().remove()

    # Draw vertical line at chosen lasso weight
    plot.axvline(
        x=5,
        color="#00000026",
        lw=10,
    )

fig.tight_layout()

# Common X and Y axis labels
fig.text(0.5, 0, "lasso regularization weight", ha="center", rotation="horizontal", fontsize=8)

# Make output dir if doesn't exist
if not os.path.exists(html_dir):
    os.mkdir(html_dir)

fig.savefig(multidms_shrinkage_plot)

fig, ax = plt.subplots(figsize=(5,0.5))

# Extract data for condition
chosen_lasso_weight_data = (
    shifts_comparison_pivoted.loc[
        (shifts_comparison_pivoted["lasso_shift"] == 0.0001)
    ]
    .groupby(["condition", "site", "wildtype"])
    .aggregate({"median_shift" : "mean"})
    .reset_index()
)

chosen_lasso_weight_data["condition"] = (
    chosen_lasso_weight_data["condition"].map({
        "shift_human_aDG" : "human",
        "shift_mastomys_aDG" : "mastomys"
    })
)

plot = sns.lineplot(
    data=chosen_lasso_weight_data,
    x="site",
    y="median_shift",
    hue="condition",
    palette={"mastomys" : "#CC6677", "human" : "#1f78b4"},
    alpha=0.5,
    lw=1,
    ax=ax,
)
plot.set_title("lasso regularization weight: 1e-4", fontsize=8)
plot.set_xlim(0,491)
plot.set_xticks([100, 200, 300, 400])
plot.set_xticklabels(plot.get_xticks(), size=8, rotation=90)
plot.set_ylim(-0.125,0.125)
plot.set_yticks([-0.1, 0, 0.1])
plot.set_yticklabels(plot.get_yticks(), size=8)
plot.set_xlabel("site", fontsize=8)
plot.set_ylabel("site\nmean shift", fontsize=8)

# Change all spines
for axis in ["top", "bottom", "left", "right"]:
    plot.spines[axis].set_linewidth(1)
plot.tick_params(axis="both", length=3, width=1)
sns.despine()

sns.move_legend(
    plot, 
    "upper left", 
    bbox_to_anchor=(1.1, 1),
    fontsize=8,
    markerscale=1,
    handletextpad=0.2,
    title="\u03B1-DG ortholog",
    title_fontproperties = {
        "size" : 8, 
        "weight" : "bold",
    },
    frameon=False,
    borderaxespad=0.1,
) 

# Make output dir if doesn't exist
if not os.path.exists(html_dir):
    os.mkdir(html_dir)

fig.savefig(multidms_shift_profile)

# Read data
hek_df = pd.read_csv(HEK293T_data_path)
human_df = pd.read_csv(humanDAG1_data_path)
mastomys_df = pd.read_csv(mastomysDAG1_data_path)

# Merge data on intersection of measured values
merged_df = (
    human_df.merge(
        mastomys_df,
        how="inner",
        on=["site", "wildtype", "mutant"],
        suffixes=["_human", "_mastomys"],
        validate="one_to_one",
    )
    .merge(
        hek_df,
        how="inner",
        on=["site", "wildtype", "mutant"],
        validate="one_to_one",
    )
)
merged_df = merged_df.rename(columns={
    "effect" : "effect_HEK293T", 
    "times_seen" : "times_seen_HEK293T", 
    "n_selections" : "n_selections_HEK293T", 
})

# Add average times seen column
merged_df["average_times_seen"] = merged_df[["times_seen_human", "times_seen_mastomys", "times_seen_HEK293T"]].mean(axis=1)

# Filter for number of selections
merged_df = (
    merged_df.loc[
        (merged_df["n_selections_human"] >= n_selections)
        &
        (merged_df["n_selections_mastomys"] >= n_selections)
        &
        (merged_df["n_selections_HEK293T"] >= n_selections)
    ]
)

# Calculate statistics
r, p = sp.stats.pearsonr(merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_human"], merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_HEK293T"])
print(f"r correlation human vs HEK (min_times_seen={MTS}): {r:.2f}")
print(f"r^2 correlation human vs HEK (min_times_seen={MTS}): {r**2:.2f}")

r, p = sp.stats.pearsonr(merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_mastomys"], merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_HEK293T"])
print(f"r correlation mastomys vs HEK (min_times_seen={MTS}): {r:.2f}")
print(f"r^2 correlation mastomys vs HEK (min_times_seen={MTS}): {r**2:.2f}")

r, p = sp.stats.pearsonr(merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_mastomys"], merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_human"])
print(f"r correlation mastomys vs human (min_times_seen={MTS}): {r:.2f}")
print(f"r^2 correlation mastomys vs human (min_times_seen={MTS}): {r**2:.2f}")

slider = alt.binding_range(min=1, max=25, step=1, name="times_seen")
selector = alt.param(name="SelectorName", value=MTS, bind=slider)

# Plot data
human_vs_hek = alt.Chart(merged_df).mark_point(filled=True, color="black").encode(
    alt.X(
        "effect_human",
        axis=alt.Axis(
            title=["effect on cell entry measured in", "293T\u0394DAG1+humanDAG1 cells"], 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    alt.Y(
        "effect_HEK293T",
        axis=alt.Axis(
            title="effect on cell entry measured in 293T cells", 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    opacity=alt.condition(
        alt.datum.average_times_seen < selector,
        alt.value(0),
        alt.value(0.1)
    ),
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        "effect_human",
        "times_seen_human",
        "n_selections_human",
        "effect_HEK293T",
        "times_seen_HEK293T",
        "n_selections_HEK293T"
    ],
).properties(
    width=300,
    height=300
)

mastomys_vs_hek = alt.Chart(merged_df).mark_point(filled=True, color="black").encode(
    alt.X(
        "effect_mastomys",
        axis=alt.Axis(
            title=["effect on cell entry measured in", "293T\u0394DAG1+mastomysDAG1 cells"], 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    alt.Y(
        "effect_HEK293T",
        axis=alt.Axis(
            title="effect on cell entry measured in 293T cells", 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    opacity=alt.condition(
        alt.datum.average_times_seen < selector,
        alt.value(0),
        alt.value(0.1)
    ),
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        "effect_mastomys",
        "times_seen_mastomys",
        "n_selections_mastomys",
        "effect_HEK293T",
        "times_seen_HEK293T",
        "n_selections_HEK293T",
    ],
).properties(
    width=300,
    height=300
)

mastomys_vs_human = alt.Chart(merged_df).mark_point(filled=True, color="black").encode(
    alt.X(
        "effect_human",
        axis=alt.Axis(
            title=["effect on cell entry measured in", "293T\u0394DAG1+humanDAG1 cells"], 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    alt.Y(
        "effect_mastomys",
        axis=alt.Axis(
            title=["effect on cell entry measured in", "293T\u0394DAG1+mastomysDAG1 cells"], 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    opacity=alt.condition(
        alt.datum.average_times_seen < selector,
        alt.value(0),
        alt.value(0.1)
    ),
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        "effect_human",
        "times_seen_human",
        "n_selections_human",
        "effect_mastomys",
        "times_seen_mastomys",
        "n_selections_mastomys"
    ],
).properties(
    width=300,
    height=300,
)

corr_chart = alt.hconcat(
    human_vs_hek,
    mastomys_vs_hek,
    mastomys_vs_human, 
    spacing=5,
    title="Correlations of functional selections for DAG1 orthologs",
).add_params(
   selector
).configure_axis(
    grid=False,
    labelFontSize=16,
    titleFontSize=16,
    labelFontWeight="normal",
    titleFontWeight="normal",
).configure_title(
    fontSize=24,
).configure_view(
    stroke=None
)

# Make output dir if doesn't exist
if not os.path.exists(html_dir):
    os.mkdir(html_dir)

print(f"Saving to {html_output}")
corr_chart.save(html_output)

corr_chart

r correlation human vs HEK (min_times_seen=2): 0.93
r^2 correlation human vs HEK (min_times_seen=2): 0.87
r correlation mastomys vs HEK (min_times_seen=2): 0.93
r^2 correlation mastomys vs HEK (min_times_seen=2): 0.87
r correlation mastomys vs human (min_times_seen=2): 0.94
r^2 correlation mastomys vs human (min_times_seen=2): 0.89

Saving to results/DAG1_ortholog_correlations/DAG1_ortholog_correlations.html

# Calculate statistics
r, p = sp.stats.pearsonr(merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_human"], merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_HEK293T"])
print(f"r correlation human vs HEK (min_times_seen={MTS}): {r:.2f}")
print(f"r^2 correlation human vs HEK (min_times_seen={MTS}): {r**2:.2f}")

r, p = sp.stats.pearsonr(merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_mastomys"], merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_HEK293T"])
print(f"r correlation mastomys vs HEK (min_times_seen={MTS}): {r:.2f}")
print(f"r^2 correlation mastomys vs HEK (min_times_seen={MTS}): {r**2:.2f}")

r, p = sp.stats.pearsonr(merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_mastomys"], merged_df.loc[merged_df["average_times_seen"] >= MTS]["effect_human"])
print(f"r correlation mastomys vs human (min_times_seen={MTS}): {r:.2f}")
print(f"r^2 correlation mastomys vs human (min_times_seen={MTS}): {r**2:.2f}")

slider = alt.binding_range(min=1, max=25, step=1, name="times_seen")
selector = alt.param(name="SelectorName", value=MTS, bind=slider)

# Plot data
human_vs_hek = alt.Chart(merged_df).mark_point(filled=True, color="black").encode(
    alt.X(
        "effect_human",
        axis=alt.Axis(
            title=["effect on cell entry measured in", "293T\u0394DAG1+humanDAG1 cells"], 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    alt.Y(
        "effect_HEK293T",
        axis=alt.Axis(
            title="effect on cell entry measured in 293T cells", 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    opacity=alt.condition(
        alt.datum.average_times_seen < selector,
        alt.value(0),
        alt.value(0.1)
    ),
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        "effect_human",
        "times_seen_human",
        "n_selections_human",
        "effect_HEK293T",
        "times_seen_HEK293T",
        "n_selections_HEK293T"
    ],
).properties(
    width=110,
    height=110
)

mastomys_vs_hek = alt.Chart(merged_df).mark_point(filled=True, color="black").encode(
    alt.X(
        "effect_mastomys",
        axis=alt.Axis(
            title=["effect on cell entry measured in", "293T\u0394DAG1+mastomysDAG1 cells"], 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    alt.Y(
        "effect_HEK293T",
        axis=alt.Axis(
            title="effect on cell entry measured in 293T cells", 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    opacity=alt.condition(
        alt.datum.average_times_seen < selector,
        alt.value(0),
        alt.value(0.1)
    ),
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        "effect_mastomys",
        "times_seen_mastomys",
        "n_selections_mastomys",
        "effect_HEK293T",
        "times_seen_HEK293T",
        "n_selections_HEK293T",
    ],
).properties(
    width=110,
    height=110
)

mastomys_vs_human = alt.Chart(merged_df).mark_point(filled=True, color="black").encode(
    alt.X(
        "effect_human",
        axis=alt.Axis(
            title=["effect on cell entry measured in", "293T\u0394DAG1+humanDAG1 cells"], 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    alt.Y(
        "effect_mastomys",
        axis=alt.Axis(
            title=["effect on cell entry measured in", "293T\u0394DAG1+mastomysDAG1 cells"], 
            values=[-5,-4, -3, -2, -1, 0, 1],
            domainWidth=1,
            domainColor="black",
            tickColor="black",
        ),
        scale=alt.Scale(domain=[-5.5,1.5])
    ),
    opacity=alt.condition(
        alt.datum.average_times_seen < selector,
        alt.value(0),
        alt.value(0.1)
    ),
    tooltip=[
        "site",
        "wildtype",
        "mutant",
        "effect_human",
        "times_seen_human",
        "n_selections_human",
        "effect_mastomys",
        "times_seen_mastomys",
        "n_selections_mastomys"
    ],
).properties(
    width=110,
    height=110
)

corr_chart = alt.hconcat(
    human_vs_hek,
    mastomys_vs_hek,
    mastomys_vs_human, 
    spacing=5
).add_params(
   selector
).configure_axis(
    grid=False,
    labelFontSize=8,
    titleFontSize=8,
    labelFontWeight="normal",
    titleFontWeight="normal",
).configure_point(
    size=10
).configure_view(
    stroke=None
)

corr_chart

r correlation human vs HEK (min_times_seen=2): 0.93
r^2 correlation human vs HEK (min_times_seen=2): 0.87
r correlation mastomys vs HEK (min_times_seen=2): 0.93
r^2 correlation mastomys vs HEK (min_times_seen=2): 0.87
r correlation mastomys vs human (min_times_seen=2): 0.94
r^2 correlation mastomys vs human (min_times_seen=2): 0.89

Analysis of functional selections on humanDAG1 and mastomysDAG1 cells¶

MultiDMS analysis¶

Correlation of functional effects between the different cell lines¶