cell_entry_validations.ipynb¶

This notebook will read in experimentally determined luciferase entry of individual mutants and plot correlation with DMS entry scores

  • Written by Brendan Larsen

Papermill parameters¶

In [1]:
# this cell is tagged as parameters for `papermill` parameterization
altair_config = None
nipah_config = None
validation_file_E2 = None
validation_file_E3 = None

func_scores_E2_file = None
func_scores_E3_file = None

func_score_E2_plot = None
func_score_E3_plot = None
corr_plots_combined = None
In [2]:
# Parameters
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/theme.py"
validation_file_E2 = (
    "data/custom_analyses_data/experimental_data/functional_validations_EFNB2.csv"
)
validation_file_E3 = (
    "data/custom_analyses_data/experimental_data/functional_validations_EFNB3.csv"
)
func_scores_E2_file = "results/func_effects/averages/CHO_bEFNB2_func_effects.csv"
func_scores_E3_file = "results/func_effects/averages/CHO_bEFNB3_func_effects.csv"
func_score_E2_plot = "results/images/func_score_E2_validation.html"
func_score_E3_plot = "results/images/func_score_E3_validation.html"
corr_plots_combined = "results/images/corr_plots_combined.html"

Import packages, set working directory, import altair theme¶

In [3]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import sys

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

# setup working directory
if os.getcwd() == "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/":
    pass
    print("Already in correct directory")
else:
    os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
    print("Setup in correct directory")

#import altair themes from /data/custom_analyses_data/theme.py and enable
sys.path.append('data/custom_analyses_data/')
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')
Setup in correct directory
Out[3]:
ThemeRegistry.enable('main_theme')

Read config file¶

In [4]:
with open("config.yaml") as f:
    config = yaml.safe_load(f)

For running interactively¶

In [5]:
if corr_plots_combined is None:
    nipah_config = "nipah_config.yaml"
    validation_file_E2 = (
        "data/custom_analyses_data/experimental_data/functional_validations_EFNB2.csv"
    )
    validation_file_E3 = (
        "data/custom_analyses_data/experimental_data/functional_validations_EFNB3.csv"
    )
    func_scores_E2_file = "results/func_effects/averages/CHO_bEFNB2_func_effects.csv"
    func_scores_E3_file = "results/func_effects/averages/CHO_bEFNB3_func_effects.csv"

Load in config files¶

In [6]:
with open(nipah_config) as f:
    config = yaml.safe_load(f)

Import luciferase (RLUs/uL) readings for each mutant¶

In [7]:
func_validations_EFNB2 = pd.read_csv(validation_file_E2, na_filter=None)
func_validations_EFNB2 = func_validations_EFNB2.rename(
    columns={"mean_luciferase": "mean_luciferase_E2"}
)
func_validations_EFNB3 = pd.read_csv(validation_file_E3, na_filter=None)
func_validations_EFNB3 = func_validations_EFNB3.rename(
    columns={"mean_luciferase": "mean_luciferase_E3"}
)
func_validations_EFNB3 = func_validations_EFNB3.drop("mutation", axis=1)
concat = pd.concat([func_validations_EFNB2, func_validations_EFNB3], axis=1)
display(concat.head(30))
mutation mean_luciferase_E2 mean_luciferase_E3
0 Q530F 13839 39157
1 Q530F 11669 38929
2 Q530F 12583 32625
3 Y389T 9223 18
4 Y389T 9564 21
5 Y389T 6327 30
6 Q530E 8952 3
7 Q530E 4825 2
8 Q530E 7865 1
9 P488S 12841 2012
10 P488S 1023 155
11 P488S 9621 1672
12 Q530L 13695 5601
13 Q530L 13589 5730
14 Q530L 11817 5136
15 C162L 1 30
16 C162L 4 9
17 C162L 1 20
18 F168A 7771 24956
19 F168A 9860 29080
20 F168A 6493 23138
21 Unmutated 8775 24502
22 Unmutated 9407 23416
23 Unmutated 9936 32370
24 Q492L 19403 42769
25 Q492L 10533 36135
26 Q492L 11757 46212

Now import func scores and make new column to match above data frame to merge on¶

In [8]:
func_scores = pd.read_csv(func_scores_E2_file)
func_scores["mutation"] = (
    func_scores["wildtype"] + func_scores["site"].astype(str) + func_scores["mutant"]
)

func_scores_E3 = pd.read_csv(func_scores_E3_file)
func_scores_E3["mutation"] = (
    func_scores_E3["wildtype"]
    + func_scores_E3["site"].astype(str)
    + func_scores_E3["mutant"]
)

func_scores_merged = func_scores.merge(
    func_scores_E3, on=["mutation"], how="left", suffixes=["_E2", "_E3"]
)
merged = concat.merge(func_scores_merged, on=["mutation"], how="left")
# Change effect of WT to very small number other than 0 so can plot on log scale
merged.loc[merged["mutation"] == "Unmutated", ["effect_E2", "effect_E3"]] = 0.0000001

Now Plot Correlations¶

CHO-EFNB2 Entry Correlations¶

In [9]:
##### calculate R value:
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
    merged["effect_E2"], merged["mean_luciferase_E2"]
)
r_value = float(r_value)


# Sorting function to put 'WT' on top of the legend, followed by numerical order
def custom_sort_order(array):
    # Sort based on the numerical part in mutation strings, e.g., '530' in 'Q530F'
    def extract_number(mutation):
        num = re.search(r"\d+", mutation)
        return int(num.group()) if num else 0

    array = sorted(array, key=extract_number)

    # Move 'WT' to the beginning of the list
    if "Unmutated" in array:
        array.remove("Unmutated")
        array.insert(0, "Unmutated")
    return array


# Define the category10 colors manually
category10_colors = ["#4e79a7","#f28e2c","#e15759","#76b7b2","#59a14f","#edc949","#af7aa1","#ff9da7","#9c755f","#bab0ab"]
# Adjust colors based on the unique mutations
colors = ["black"] + category10_colors[: len(merged["mutation"].unique()) - 1]

# Create the Altair chart
corr_chart = (
    alt.Chart(merged, title=alt.Title("CHO-bEFNB2"))
    .encode(
        x=alt.X(
            "effect_E2:Q",
            title="Cell entry in DMS",
            scale=alt.Scale(domain=[-4, 1]),
            axis=alt.Axis(values=[-4, -3, -2, -1, 0, 1], tickCount=6),
        ),
        y=alt.Y(
            "mean_luciferase_E2",
            title="RLU/μL in validation asasy",
            scale=alt.Scale(type="log", base=10),
            axis=alt.Axis(
                format=".0e", tickCount=4
            ),  
        ),
        color=alt.Color(
            "mutation",
            title="Virus",
            scale=alt.Scale(
                domain=custom_sort_order(merged["mutation"].unique()), range=colors
            ),
        ),
        shape=alt.Shape('mutation',scale=alt.Scale(domain=custom_sort_order(merged["mutation"].unique()))),
        tooltip=["mutation", "effect_E2", "mean_luciferase_E2"],
    )
    .mark_point(size=125,filled=True,opacity=0.5)
)

min_effect_E2 = int(merged["effect_E2"].min())
max_mean_luciferase_E2 = int(merged["mean_luciferase_E2"].max())

text = (
    alt.Chart(
        {
            "values": [
                {
                    "x": min_effect_E2,
                    "y": max_mean_luciferase_E2,
                    "text": f"r = {r_value:.2f}",
                }
            ]
        }
    )
    .mark_text(
        align="left",
        baseline="top",
        dx=-30,  
        dy=-30,  
        
    )
    .encode(x=alt.X("x:Q"), y=alt.Y("y:Q"), text="text:N")
)
# text
final_chart = corr_chart + text
final_chart = final_chart.properties(height=200,width=200).resolve_scale(shape='independent', color='independent')

func_score_E2_chart = final_chart
func_score_E2_chart.display()
if corr_plots_combined is not None:
    func_score_E2_chart.save(func_score_E2_plot)

CHO-EFNB3 entry correlations plot¶

In [10]:
# calculate R value:
slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(
    merged["effect_E3"], merged["mean_luciferase_E3"]
)
r_value = float(r_value)


# Sorting function to put 'WT' on top of the legend, followed by numerical order
def custom_sort_order(array):
    # Sort based on the numerical part in mutation strings
    def extract_number(mutation):
        num = re.search(r"\d+", mutation)
        return int(num.group()) if num else 0

    array = sorted(array, key=extract_number)

    # Move 'WT' to the beginning of the list
    if "WT" in array:
        array.remove("WT")
        array.insert(0, "WT")
    return array

# Adjust colors based on the unique mutations
colors = ["black"] + category10_colors[: len(merged["mutation"].unique()) - 1]

# Create the Altair chart
corr_chart = (
    alt.Chart(merged, title=alt.Title("CHO-bEFNB3"))
    .encode(
        x=alt.X(
            "effect_E3:Q",
            title="Cell entry in DMS",
            scale=alt.Scale(domain=[-4, 1]),
            axis=alt.Axis(values=[-4, -3, -2, -1, 0, 1], tickCount=6),
        ),
        y=alt.Y(
            "mean_luciferase_E3",
            title="RLU/μL in validation asasy",
            scale=alt.Scale(type="log", base=10),
            axis=alt.Axis(
                format=".0e", tickCount=4
            ),  # Display in scientific notation
        ),
        color=alt.Color(
            "mutation",
            title="Virus",
            scale=alt.Scale(
                domain=custom_sort_order(merged["mutation"].unique()), range=colors
            ),
            
        ),
        tooltip=["mutation", "effect_E3", "mean_luciferase_E3"],
        shape=alt.Shape('mutation',scale=alt.Scale(domain=custom_sort_order(merged["mutation"].unique()))),
    )
    .mark_point(size=125,filled=True,opacity=0.5)
)

min_effect_E3 = int(merged["effect_E3"].min())
max_mean_luciferase_E3 = int(merged["mean_luciferase_E3"].max())

text = (
    alt.Chart(
        {
            "values": [
                {
                    "x": min_effect_E3,
                    "y": max_mean_luciferase_E3,
                    "text": f"r = {r_value:.2f}",
                }
            ]
        }
    )
    .mark_text(
        align="left",
        baseline="top",
        dx=-30,  # Adjust this for position
        dy=-15,  # Adjust this for position
        
    )
    .encode(x=alt.X("x:Q"), y=alt.Y("y:Q"), text="text:N")
)

final_chart = (corr_chart + text).resolve_scale(shape='independent', color='independent')
final_chart = final_chart.properties(width=200,height=200)
func_score_E3_chart = final_chart
func_score_E3_chart.display()
if corr_plots_combined is not None:
    func_score_E3_chart.save(func_score_E3_plot)
In [11]:
(func_score_E2_chart | func_score_E3_chart).resolve_scale(color='shared').display()
# Combine the plots
if corr_plots_combined is not None:
    (func_score_E2_chart | func_score_E3_chart).save(corr_plots_combined)
In [ ]: