library(SingleCellExperiment)
library(ggplot2)
library(Matrix)
library(pheatmap)
library(ComplexHeatmap)
library(circlize)
library(scConvexNMF)
library(survminer)
library(survival)
#' Function to return object for ggplot2 plotting
#' @param size_text Size of the text to use for plotting
#' @param leg_pos Position of legend
#' @return ggplot theme object
theme_plot <- function(size_text=15,
leg_pos="right",
size_text_titles=NULL,
classic=FALSE,
line_size=0.85,
line_size_color="black",
axis_ticks_length=0.25,
axis_ticks_color="black",
rotate_x_axis=TRUE
){
require(ggplot2)
if( is.null(size_text_titles) ){
size_text_titles<- size_text + 2
}
if( classic ){
gg.plots<- theme_classic()
}else{
gg.plots<- theme_bw()
}
gg.plots <- gg.plots +
theme(axis.line=element_line(linewidth=line_size, colour=line_size_color),
axis.ticks=element_line(colour=axis_ticks_color, linewidth=line_size),
axis.ticks.length=unit(axis_ticks_length, "cm"),
axis.title.x=element_text(size=size_text_titles),
legend.position=leg_pos,
plot.title=element_text(hjust=0.5, size=size_text),
axis.text.y=element_text(size=size_text, color="black"),
axis.title.y=element_text(size=size_text_titles) )
if( rotate_x_axis) {
gg.plots<- gg.plots +
theme(axis.text.x=element_text(size=size_text, angle=45, hjust=1, color=line_size_color) )
}else{
gg.plots<- gg.plots +
theme(axis.text.x = element_text(size=size_text, angle = 90, vjust = 0.5, hjust=1, color=line_size_color))
}
}
pheatmap.colorsymmetric <- function(x,lim=NULL,...)
{
require(pheatmap)
if(is.null(lim)){
lim <- max(abs(x), na.rm=TRUE)
if( min(x, na.rm=TRUE) < 0 ){
lim_down<- -lim
col_palette<- colorRampPalette(c("blue","white","red"))(256)
}else{
lim_down<- 0
col_palette<- colorRampPalette(c("white","red"))(256)
}
}else{
lim <- 1
lim_down<- -1
col_palette<- colorRampPalette(c("blue","white","red"))(256)
}
pheatmap(
x, color = col_palette,
breaks=seq(lim_down,lim,length.out=255),
#clustering_method="ward.D2",
... )
}
The gene expression data can be found at the GEO GSE328575.
# Reading SCE object
sce<- readRDS("sce.rds")
meta<- data.frame(colData(sce))
Read scConvexNMF model for malignant cells, with can be found at Zenodo.
# Reading SCE object
model<- readRDS("scConvexNMF_rescoringModel_malignant.rds")
length(model$aux$cellIDs)
## [1] 44739
length(intersect(meta$Barcode_Sample, model$aux$cellIDs))
## [1] 44739
meta<- meta[model$aux$cellIDs,]
Read genesets
# all pathways
pathways<- readRDS("gene_sets.rds")
pathways<- pathways[,c(1:50, 55:70)]
pathways<- pathways[rowSums(pathways)>0,]
Read TCGA data
# Reading SCE object
lis_tcga<- readRDS("lis_tcga.rds")
meta_tc<- lis_tcga$meta
Read TCGA scConvex model
tcga_rescoringModel<- readRDS("scConvex_rescoringModel_TCGA_malignant.rds")
WGH_rescore <- getWGH_scCNMF(tcga_rescoringModel)
W_rescore<- WGH_rescore$W[rownames(meta_tc),]
# Remove conflicting LVs
lv_conflict<- c("LV19")
W_rescore<- W_rescore[,!colnames(W_rescore) %in% lv_conflict]
mean_lv<- data.frame(t(data.frame( t( apply(W_rescore, 2, function(vec) tapply( vec, meta_tc$Molecular_clusters, mean, na.rm=TRUE)) ), check.names=F )) )
colnames(mean_lv)<- sub("LV", "GP", colnames(mean_lv))
pheatmap.colorsymmetric(mean_lv,scale="column", clustering_method= "average")
## Warning: The input is a data frame, convert it to the matrix.
## Warning: It not suggested to both set `scale` and `breaks`. It makes the
## function confused.
## Warning: `breaks` does not have the same length as `color`. The colors are
## interpolated from the minimal to the maximal of `breaks`.
vec_lvs<- c("LV60", "LV49", "LV22", "LV57")
for( lv in vec_lvs){
# seurat clusters
ggdat<- data.frame(clusters=meta_tc$RNA_snn_res.4, metagene=W_rescore[,lv], Molecular_clusters=meta_tc$Molecular_clusters)
ggp<- ggplot(
ggdat,
aes(x=Molecular_clusters,y=metagene, fill=Molecular_clusters)) +
geom_violin(trim=T,scale="width") +
labs(y=sub("LV", "GP", lv)) +
theme_plot(size_text=10, leg_pos="none", classic=T, rotate_x_axis=F)
print(ggp)
# ccRCC plot
ggdat<- ggdat[ggdat$Molecular_clusters == "ccRCC",]
ggdat$clusters<- factor(ggdat$clusters, levels=sort(unique(as.numeric(as.character(ggdat$clusters)))))
ggp<- ggplot(
ggdat,
aes(x=clusters,y=metagene, fill=clusters)) +
geom_violin(trim=T,scale="width") +
labs(y=sub("LV", "GP", lv)) +
theme_plot(size_text=10, leg_pos="none", classic=T, rotate_x_axis=F)
print(ggp)
}
Reading TCGA statistics
lis_tcga_stats<- readRDS("lis_tcga_stats.rds")
survival_stats<- lis_tcga_stats$malignant_os
lim <- stats::quantile(abs(survival_stats$signed.logpadj),0.99, na.rm=T)
ggplot(survival_stats, aes(x = lv, y = log(exp.coef.), color = signed.logpadj)) +
geom_point(size = 1) + # Points for HR
geom_errorbar(aes(ymin = log(lower..95), ymax = log(upper..95)), width = 0.2) + # CI error bars
geom_hline(yintercept = 0, linetype = "dashed", color = "red") + # Reference line at HR = 1
coord_flip() + # Flip coordinates for better readability
labs(
title = "log(Hazard Ratios) with 95% Confidence Intervals",
x = "Variables",
y = "log(Hazard Ratio)"
) +
scale_color_gradientn( limits=c(-lim,lim), colours=c("blue","light grey","red"), oob=scales::squish ) +
facet_wrap(~subtype, scales="free") +
theme_plot(size_text=10, classic=T)
## Filter for ccRCC
cancer_type<- "ccRCC"
meta_use<- meta_tc[meta_tc$Molecular_clusters == cancer_type,]
# Filter matrices
W_rescore_use<- W_rescore[rownames(meta_use),]
meta_use<- cbind(meta_use, W_rescore[rownames(meta_use),])
# Read summaries
ggdat<- survival_stats[survival_stats$subtype == cancer_type,]
############################
# LV60
############################
lv_use<- "GP60"
lv_trans<- sub("GP", "LV", lv_use)
quantile_use<- as.numeric(ggdat[ggdat$lv == lv_use,"quant"])
meta_use[,paste0(lv_use, ".class")]<- ifelse(meta_use[,lv_trans] > quantile(meta_use[,lv_trans], quantile_use), "high", "low")
meta_use[,paste0(lv_use, ".class")]<- factor(meta_use[,paste0(lv_use, ".class")], levels=c("high", "low" ))
fit <- survfit(Surv(OS.time, OS) ~ GP60.class, data = meta_use)
ggsurvplot(fit, data = meta_use, risk.table = FALSE, conf.int = TRUE, pval=FALSE)
############################
# LV12
############################
lv_use<- "GP12"
lv_trans<- sub("GP", "LV", lv_use)
quantile_use<- as.numeric(ggdat[ggdat$lv == lv_use,"quant"])
meta_use[,paste0(lv_use, ".class")]<- ifelse(meta_use[,lv_trans] > quantile(meta_use[,lv_trans], quantile_use), "high", "low")
meta_use[,paste0(lv_use, ".class")]<- factor(meta_use[,paste0(lv_use, ".class")], levels=c("high", "low" ))
fit <- survfit(Surv(OS.time, OS) ~ GP12.class, data = meta_use)
ggsurvplot(fit, data = meta_use, risk.table = FALSE, conf.int = TRUE, pval=FALSE)
## Filter for pRCC
cancer_type<- "pRCC"
meta_use<- meta_tc[meta_tc$Molecular_clusters == cancer_type,]
# Filter matrices
W_rescore_use<- W_rescore[rownames(meta_use),]
meta_use<- cbind(meta_use, W_rescore[rownames(meta_use),])
# Read summaries
ggdat<- survival_stats[survival_stats$subtype == cancer_type,]
############################
# LV12
############################
lv_use<- "GP12"
lv_trans<- sub("GP", "LV", lv_use)
quantile_use<- as.numeric(ggdat[ggdat$lv == lv_use,"quant"])
meta_use[,paste0(lv_use, ".class")]<- ifelse(meta_use[,lv_trans] > quantile(meta_use[,lv_trans], quantile_use), "high", "low")
meta_use[,paste0(lv_use, ".class")]<- factor(meta_use[,paste0(lv_use, ".class")], levels=c("high", "low" ))
fit <- survfit(Surv(OS.time, OS) ~ GP12.class, data = meta_use)
ggsurvplot(fit, data = meta_use, risk.table = FALSE, conf.int = TRUE, pval=FALSE)
WGH <- getWGH_scCNMF(model)
W<- WGH$W
W<- W[, colnames(W) != "LV19"]
vec_lvs<- colnames(W)
set.seed(14)
wJ_G <- weightedJaccard_G(model,pathways,nullSamples=1000)
wJ_G_plot <- visualize_jaccard(wJ_G, vec_lvs=c("LV12", "LV20", "LV59", "LV48", "LV60"), filterGeneSets=T, cutoff_padj = 0.1, cluster_rows=T, cluster_columns=F, fontsize_row=7, fontsize_col=7, cells_highlight_lwd=1)
draw(wJ_G_plot$heatmap)
ht<- visualize_Yp(model, vec_lvs=c("LV12", "LV20", "LV59", "LV48", "LV60"), n_genes=10)
draw(ht)
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: AlmaLinux 9.6 (Sage Margay)
##
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS IMKL; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
## [5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
## [7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Canada/Eastern
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] survival_3.5-7 survminer_0.5.0
## [3] ggpubr_0.6.0 scConvexNMF_0.0.0.9000
## [5] circlize_0.4.15 ComplexHeatmap_2.18.0
## [7] pheatmap_1.0.12 Matrix_1.6-4
## [9] ggplot2_3.5.2 SingleCellExperiment_1.24.0
## [11] SummarizedExperiment_1.32.0 Biobase_2.62.0
## [13] GenomicRanges_1.54.1 GenomeInfoDb_1.38.5
## [15] IRanges_2.36.0 S4Vectors_0.40.2
## [17] BiocGenerics_0.48.1 MatrixGenerics_1.14.0
## [19] matrixStats_1.2.0
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 gridExtra_2.3 rlang_1.1.6
## [4] magrittr_2.0.3 clue_0.3-65 GetoptLong_1.0.5
## [7] compiler_4.3.1 png_0.1-8 vctrs_0.6.5
## [10] pkgconfig_2.0.3 shape_1.4.6 crayon_1.5.2
## [13] fastmap_1.1.1 backports_1.4.1 magick_2.9.0
## [16] XVector_0.42.0 labeling_0.4.3 KMsurv_0.1-6
## [19] utf8_1.2.4 rmarkdown_2.29 purrr_1.0.2
## [22] xfun_0.41 zlibbioc_1.48.0 cachem_1.0.8
## [25] jsonlite_1.8.8 highr_0.10 DelayedArray_0.28.0
## [28] broom_1.0.5 parallel_4.3.1 cluster_2.1.6
## [31] R6_2.5.1 bslib_0.6.1 RColorBrewer_1.1-3
## [34] car_3.1-2 jquerylib_0.1.4 Rcpp_1.0.12
## [37] iterators_1.0.14 knitr_1.45 zoo_1.8-12
## [40] splines_4.3.1 tidyselect_1.2.0 abind_1.4-5
## [43] yaml_2.3.8 doParallel_1.0.17 codetools_0.2-19
## [46] lattice_0.22-5 tibble_3.2.1 withr_2.5.2
## [49] evaluate_0.23 survMisc_0.5.6 pillar_1.9.0
## [52] carData_3.0-5 foreach_1.5.2 generics_0.1.3
## [55] RCurl_1.98-1.14 munsell_0.5.0 scales_1.3.0
## [58] xtable_1.8-4 glue_1.7.0 tools_4.3.1
## [61] data.table_1.14.10 ggsignif_0.6.4 Cairo_1.6-2
## [64] tidyr_1.3.0 colorspace_2.1-0 GenomeInfoDbData_1.2.11
## [67] cli_3.6.2 km.ci_0.5-6 fansi_1.0.6
## [70] S4Arrays_1.2.0 dplyr_1.1.4 gtable_0.3.4
## [73] rstatix_0.7.2 sass_0.4.8 digest_0.6.33
## [76] SparseArray_1.2.3 rjson_0.2.21 farver_2.1.1
## [79] htmltools_0.5.7 lifecycle_1.0.4 GlobalOptions_0.1.2