Loading libraries

library(SingleCellExperiment)
library(ggplot2)
library(Matrix)
library(pheatmap)
library(ComplexHeatmap)
library(circlize)
library(scConvexNMF)
library(survminer)
library(survival)

Functions

#' Function to return object for ggplot2 plotting
#' @param size_text Size of the text to use for plotting
#' @param leg_pos Position of legend
#' @return ggplot theme object
theme_plot <- function(size_text=15,
                           leg_pos="right",
                           size_text_titles=NULL,
                           classic=FALSE,
                           line_size=0.85,
                           line_size_color="black",
                           axis_ticks_length=0.25,
                           axis_ticks_color="black",
                           rotate_x_axis=TRUE
                           ){
    require(ggplot2)
    if( is.null(size_text_titles) ){
        size_text_titles<- size_text + 2
    }
    if( classic ){
        gg.plots<- theme_classic()
    }else{
        gg.plots<- theme_bw()
    }
    gg.plots <- gg.plots +
        theme(axis.line=element_line(linewidth=line_size, colour=line_size_color),
              axis.ticks=element_line(colour=axis_ticks_color, linewidth=line_size),
              axis.ticks.length=unit(axis_ticks_length, "cm"),
              axis.title.x=element_text(size=size_text_titles),
              legend.position=leg_pos,
              plot.title=element_text(hjust=0.5, size=size_text),
              axis.text.y=element_text(size=size_text, color="black"),
              axis.title.y=element_text(size=size_text_titles) )
    if( rotate_x_axis) {
        gg.plots<- gg.plots +
            theme(axis.text.x=element_text(size=size_text, angle=45, hjust=1, color=line_size_color) )
    }else{
        gg.plots<- gg.plots +
            theme(axis.text.x = element_text(size=size_text, angle = 90, vjust = 0.5, hjust=1, color=line_size_color))
    }
}

pheatmap.colorsymmetric <- function(x,lim=NULL,...)
{
    require(pheatmap)
    if(is.null(lim)){
        lim <- max(abs(x), na.rm=TRUE)
        if( min(x, na.rm=TRUE) < 0 ){
            lim_down<- -lim
            col_palette<- colorRampPalette(c("blue","white","red"))(256)
        }else{        
            lim_down<- 0
            col_palette<- colorRampPalette(c("white","red"))(256)
        }
    }else{
        lim <- 1
        lim_down<- -1
        col_palette<- colorRampPalette(c("blue","white","red"))(256)        
    }
    pheatmap(
        x, color = col_palette,
        breaks=seq(lim_down,lim,length.out=255),
        #clustering_method="ward.D2",
        ... )
}

Loading data

The gene expression data can be found at the GEO GSE328575.

# Reading SCE object
sce<- readRDS("sce.rds")
meta<- data.frame(colData(sce))

Read scConvexNMF model for malignant cells, with can be found at Zenodo.

# Reading SCE object
model<- readRDS("scConvexNMF_rescoringModel_malignant.rds")

length(model$aux$cellIDs)
## [1] 44739
length(intersect(meta$Barcode_Sample, model$aux$cellIDs))
## [1] 44739
meta<- meta[model$aux$cellIDs,]

Read genesets

# all pathways
pathways<- readRDS("gene_sets.rds")
pathways<- pathways[,c(1:50, 55:70)]
pathways<- pathways[rowSums(pathways)>0,]

Read TCGA data

# Reading SCE object
lis_tcga<- readRDS("lis_tcga.rds")

meta_tc<- lis_tcga$meta

Read TCGA scConvex model

tcga_rescoringModel<- readRDS("scConvex_rescoringModel_TCGA_malignant.rds")

Heatmap of mean GP activities in TCGA

WGH_rescore <- getWGH_scCNMF(tcga_rescoringModel)

W_rescore<- WGH_rescore$W[rownames(meta_tc),]
# Remove conflicting LVs
lv_conflict<- c("LV19")
W_rescore<- W_rescore[,!colnames(W_rescore) %in% lv_conflict]

mean_lv<- data.frame(t(data.frame( t( apply(W_rescore, 2, function(vec) tapply( vec, meta_tc$Molecular_clusters, mean, na.rm=TRUE)) ), check.names=F )) )
colnames(mean_lv)<- sub("LV", "GP", colnames(mean_lv))

pheatmap.colorsymmetric(mean_lv,scale="column", clustering_method= "average")
## Warning: The input is a data frame, convert it to the matrix.
## Warning: It not suggested to both set `scale` and `breaks`. It makes the
## function confused.
## Warning: `breaks` does not have the same length as `color`. The colors are
## interpolated from the minimal to the maximal of `breaks`.

Violin plots of GP activities in TCGA

vec_lvs<- c("LV60", "LV49", "LV22", "LV57")

for( lv in vec_lvs){
    # seurat clusters
    ggdat<- data.frame(clusters=meta_tc$RNA_snn_res.4, metagene=W_rescore[,lv], Molecular_clusters=meta_tc$Molecular_clusters)
    ggp<- ggplot(
        ggdat,
        aes(x=Molecular_clusters,y=metagene, fill=Molecular_clusters)) +
        geom_violin(trim=T,scale="width") +
        labs(y=sub("LV", "GP", lv))  + 
        theme_plot(size_text=10, leg_pos="none", classic=T, rotate_x_axis=F)
    print(ggp)
    # ccRCC plot
    ggdat<- ggdat[ggdat$Molecular_clusters == "ccRCC",]
    ggdat$clusters<- factor(ggdat$clusters, levels=sort(unique(as.numeric(as.character(ggdat$clusters)))))    
    ggp<- ggplot(
        ggdat,
        aes(x=clusters,y=metagene, fill=clusters)) +
        geom_violin(trim=T,scale="width") +
        labs(y=sub("LV", "GP", lv))  + 
        theme_plot(size_text=10, leg_pos="none", classic=T, rotate_x_axis=F)
    print(ggp)
}

TCGA survival statistics

Reading TCGA statistics

lis_tcga_stats<- readRDS("lis_tcga_stats.rds")

survival_stats<- lis_tcga_stats$malignant_os

lim <- stats::quantile(abs(survival_stats$signed.logpadj),0.99, na.rm=T)

ggplot(survival_stats, aes(x = lv, y = log(exp.coef.), color = signed.logpadj)) +
    geom_point(size = 1) + # Points for HR
    geom_errorbar(aes(ymin = log(lower..95), ymax = log(upper..95)), width = 0.2) + # CI error bars
    geom_hline(yintercept = 0, linetype = "dashed", color = "red") + # Reference line at HR = 1
    coord_flip() + # Flip coordinates for better readability
    labs(
        title = "log(Hazard Ratios) with 95% Confidence Intervals",
        x = "Variables",
        y = "log(Hazard Ratio)"
    ) +
    scale_color_gradientn( limits=c(-lim,lim), colours=c("blue","light grey","red"), oob=scales::squish ) +
    facet_wrap(~subtype, scales="free") +
    theme_plot(size_text=10, classic=T)

Survival plots

## Filter for ccRCC
cancer_type<- "ccRCC"
meta_use<- meta_tc[meta_tc$Molecular_clusters == cancer_type,]

# Filter matrices
W_rescore_use<- W_rescore[rownames(meta_use),]
meta_use<- cbind(meta_use, W_rescore[rownames(meta_use),])

# Read summaries
ggdat<- survival_stats[survival_stats$subtype == cancer_type,]

############################
# LV60
############################
lv_use<- "GP60"
lv_trans<- sub("GP", "LV", lv_use)
quantile_use<- as.numeric(ggdat[ggdat$lv == lv_use,"quant"])
meta_use[,paste0(lv_use, ".class")]<- ifelse(meta_use[,lv_trans] > quantile(meta_use[,lv_trans], quantile_use), "high", "low")
meta_use[,paste0(lv_use, ".class")]<- factor(meta_use[,paste0(lv_use, ".class")], levels=c("high", "low" ))    
fit <- survfit(Surv(OS.time, OS) ~ GP60.class, data = meta_use)
ggsurvplot(fit, data = meta_use, risk.table = FALSE, conf.int = TRUE, pval=FALSE)

############################
# LV12
############################
lv_use<- "GP12"
lv_trans<- sub("GP", "LV", lv_use)
quantile_use<- as.numeric(ggdat[ggdat$lv == lv_use,"quant"])
meta_use[,paste0(lv_use, ".class")]<- ifelse(meta_use[,lv_trans] > quantile(meta_use[,lv_trans], quantile_use), "high", "low")
meta_use[,paste0(lv_use, ".class")]<- factor(meta_use[,paste0(lv_use, ".class")], levels=c("high", "low" ))    
fit <- survfit(Surv(OS.time, OS) ~ GP12.class, data = meta_use)
ggsurvplot(fit, data = meta_use, risk.table = FALSE, conf.int = TRUE, pval=FALSE)

## Filter for pRCC
cancer_type<- "pRCC"
meta_use<- meta_tc[meta_tc$Molecular_clusters == cancer_type,]

# Filter matrices
W_rescore_use<- W_rescore[rownames(meta_use),]
meta_use<- cbind(meta_use, W_rescore[rownames(meta_use),])

# Read summaries
ggdat<- survival_stats[survival_stats$subtype == cancer_type,]

############################
# LV12
############################
lv_use<- "GP12"
lv_trans<- sub("GP", "LV", lv_use)
quantile_use<- as.numeric(ggdat[ggdat$lv == lv_use,"quant"])
meta_use[,paste0(lv_use, ".class")]<- ifelse(meta_use[,lv_trans] > quantile(meta_use[,lv_trans], quantile_use), "high", "low")
meta_use[,paste0(lv_use, ".class")]<- factor(meta_use[,paste0(lv_use, ".class")], levels=c("high", "low" ))    
fit <- survfit(Surv(OS.time, OS) ~ GP12.class, data = meta_use)
ggsurvplot(fit, data = meta_use, risk.table = FALSE, conf.int = TRUE, pval=FALSE)

Overlap with pathways

WGH <- getWGH_scCNMF(model)
W<- WGH$W
W<- W[, colnames(W) != "LV19"]
vec_lvs<- colnames(W)
set.seed(14)
wJ_G <- weightedJaccard_G(model,pathways,nullSamples=1000)

wJ_G_plot <- visualize_jaccard(wJ_G, vec_lvs=c("LV12", "LV20", "LV59", "LV48", "LV60"), filterGeneSets=T, cutoff_padj = 0.1, cluster_rows=T, cluster_columns=F, fontsize_row=7, fontsize_col=7, cells_highlight_lwd=1)
draw(wJ_G_plot$heatmap)

Visualize expression

ht<- visualize_Yp(model, vec_lvs=c("LV12", "LV20", "LV59", "LV48", "LV60"), n_genes=10)
draw(ht)

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: AlmaLinux 9.6 (Sage Margay)
## 
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS IMKL;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8    
##  [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8   
##  [7] LC_PAPER=en_CA.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Canada/Eastern
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] survival_3.5-7              survminer_0.5.0            
##  [3] ggpubr_0.6.0                scConvexNMF_0.0.0.9000     
##  [5] circlize_0.4.15             ComplexHeatmap_2.18.0      
##  [7] pheatmap_1.0.12             Matrix_1.6-4               
##  [9] ggplot2_3.5.2               SingleCellExperiment_1.24.0
## [11] SummarizedExperiment_1.32.0 Biobase_2.62.0             
## [13] GenomicRanges_1.54.1        GenomeInfoDb_1.38.5        
## [15] IRanges_2.36.0              S4Vectors_0.40.2           
## [17] BiocGenerics_0.48.1         MatrixGenerics_1.14.0      
## [19] matrixStats_1.2.0          
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7            gridExtra_2.3           rlang_1.1.6            
##  [4] magrittr_2.0.3          clue_0.3-65             GetoptLong_1.0.5       
##  [7] compiler_4.3.1          png_0.1-8               vctrs_0.6.5            
## [10] pkgconfig_2.0.3         shape_1.4.6             crayon_1.5.2           
## [13] fastmap_1.1.1           backports_1.4.1         magick_2.9.0           
## [16] XVector_0.42.0          labeling_0.4.3          KMsurv_0.1-6           
## [19] utf8_1.2.4              rmarkdown_2.29          purrr_1.0.2            
## [22] xfun_0.41               zlibbioc_1.48.0         cachem_1.0.8           
## [25] jsonlite_1.8.8          highr_0.10              DelayedArray_0.28.0    
## [28] broom_1.0.5             parallel_4.3.1          cluster_2.1.6          
## [31] R6_2.5.1                bslib_0.6.1             RColorBrewer_1.1-3     
## [34] car_3.1-2               jquerylib_0.1.4         Rcpp_1.0.12            
## [37] iterators_1.0.14        knitr_1.45              zoo_1.8-12             
## [40] splines_4.3.1           tidyselect_1.2.0        abind_1.4-5            
## [43] yaml_2.3.8              doParallel_1.0.17       codetools_0.2-19       
## [46] lattice_0.22-5          tibble_3.2.1            withr_2.5.2            
## [49] evaluate_0.23           survMisc_0.5.6          pillar_1.9.0           
## [52] carData_3.0-5           foreach_1.5.2           generics_0.1.3         
## [55] RCurl_1.98-1.14         munsell_0.5.0           scales_1.3.0           
## [58] xtable_1.8-4            glue_1.7.0              tools_4.3.1            
## [61] data.table_1.14.10      ggsignif_0.6.4          Cairo_1.6-2            
## [64] tidyr_1.3.0             colorspace_2.1-0        GenomeInfoDbData_1.2.11
## [67] cli_3.6.2               km.ci_0.5-6             fansi_1.0.6            
## [70] S4Arrays_1.2.0          dplyr_1.1.4             gtable_0.3.4           
## [73] rstatix_0.7.2           sass_0.4.8              digest_0.6.33          
## [76] SparseArray_1.2.3       rjson_0.2.21            farver_2.1.1           
## [79] htmltools_0.5.7         lifecycle_1.0.4         GlobalOptions_0.1.2