library(SingleCellExperiment)
library(ggplot2)
library(Matrix)
library(pheatmap)
library(RColorBrewer)
library(uwot)
library(lsa)
library(GEDI)
library(batchelor)
set.seed(43)
#' Function to return object for ggplot2 plotting
#' @param size_text Size of the text to use for plotting
#' @param leg_pos Position of legend
#' @return ggplot theme object
theme_plot <- function(size_text=15,
leg_pos="right",
size_text_titles=NULL,
classic=FALSE,
line_size=0.85,
line_size_color="black",
axis_ticks_length=0.25,
axis_ticks_color="black",
rotate_x_axis=TRUE
){
require(ggplot2)
if( is.null(size_text_titles) ){
size_text_titles<- size_text + 2
}
if( classic ){
gg.plots<- theme_classic()
}else{
gg.plots<- theme_bw()
}
gg.plots <- gg.plots +
theme(axis.line=element_line(linewidth=line_size, colour=line_size_color),
axis.ticks=element_line(colour=axis_ticks_color, linewidth=line_size),
axis.ticks.length=unit(axis_ticks_length, "cm"),
axis.title.x=element_text(size=size_text_titles),
legend.position=leg_pos,
plot.title=element_text(hjust=0.5, size=size_text),
axis.text.y=element_text(size=size_text, color="black"),
axis.title.y=element_text(size=size_text_titles) )
if( rotate_x_axis) {
gg.plots<- gg.plots +
theme(axis.text.x=element_text(size=size_text, angle=45, hjust=1, color=line_size_color) )
}else{
gg.plots<- gg.plots +
theme(axis.text.x = element_text(size=size_text, angle = 90, vjust = 0.5, hjust=1, color=line_size_color))
}
}
The gene expression data can be found at the GEO GSE328575.
# Reading SCE object
sce<- readRDS("sce.rds")
meta_sc<- colData(sce)
Read TCGA data The TCGA data can be found at Zenodo.
# Reading SCE object
lis_tcga<- readRDS("lis_tcga.rds")
meta_tc<- lis_tcga$meta
x<- meta_tc$ccpRCT_sig
meta_tc$scale_ccpRCT_sig<- (x - min(x) ) / ( max(x) - min(x) )
ggplot(meta_tc, aes(Molecular_clusters, scale_ccpRCT_sig, fill=Molecular_clusters) ) +
geom_violin(trim=T,scale="width") +
theme_plot(rotate_x_axis=F, classic=T)
We will also load the GEDI model for the integrated tumor data, which can be accessed in here.
model<- readRDS("gedi_model_rcc_tcga.rds")
tcga_indices <- 1:model$aux$Ni[[1]]
RCC_indices <- (model$aux$Ni[[1]]+1):(sum(model$aux$Ni))
# Reorder metadatas
meta_tc<- meta_tc[model$aux$cellIDs[tcga_indices],]
meta_sc<- meta_sc[model$aux$cellIDs[RCC_indices],]
Calculate the mean embedding vector per TCGA cluster, followed by cosine similarity of each single cell to that vector.
target<- 22
snn_clusters <- meta_tc$RNA_snn_res.4
names(snn_clusters)<- rownames(meta_tc)
reference_vector <- rowMeans(model$params$Bi[[1]][,which(snn_clusters==target)])
predicted_scores <- apply(model$params$Bi[[2]],2,cosine,y=reference_vector)
meta_sc$subtype<- factor(meta_sc$subtype, levels=c("ccRCC", "pRCC", "uRCC", "chRCC", "ccpRCT") )
ggplot(data.frame(x=meta_sc$subtype, y=predicted_scores), aes(x=x,y=y, fill=x)) +
geom_violin(trim=T,scale="width") +
ggtitle(target) +
theme_plot(rotate_x_axis=F, classic=T)
Inversely, define the reference vector based on scRNA-seq data, and
calculate cosine similarity with TCGA samples
predicted_scores <- apply(model$params$Bi[[1]],2,cosine,y=reference_vector)
# Violin plot
ggdat<- data.frame(x=snn_clusters, y=predicted_scores, subtype=meta_tc$Molecular_clusters)
ggplot(ggdat, aes(x=subtype,y=y, fill=subtype)) +
geom_violin(trim=T,scale="width") +
theme_plot(rotate_x_axis=F, classic=T)
sce<- multiBatchNorm(sce, batch=sce$Sample)
norm_counts<- as.matrix( assay(sce, "logcounts") )
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 19.4 GiB
meta_sc<- data.frame(colData(sce))
# Filter for only tumor
meta_sc<- meta_sc[meta_sc$class == "patient",]
meta_sc$cell_subtype<- meta_sc$cell_type
meta_sc$cell_subtype[meta_sc$cell_type == "Malignant" & meta_sc$subtype == "ccpRCT"]<- "Malignant.ccpRCT"
meta_sc$cell_subtype[meta_sc$cell_type == "Malignant" & meta_sc$subtype == "ccRCC"]<- "Malignant.ccRCC"
meta_sc$cell_subtype[meta_sc$cell_type == "Malignant" & meta_sc$subtype == "chRCC"]<- "Malignant.chRCC"
meta_sc$cell_subtype[meta_sc$cell_type == "Malignant" & meta_sc$subtype == "pRCC"]<- "Malignant.pRCC"
meta_sc$cell_subtype[meta_sc$cell_type == "Malignant" & meta_sc$subtype == "uRCC"]<- "Malignant.uRCC"
norm_counts<- norm_counts[,rownames(meta_sc)]
df_plot<- data.frame(expr=norm_counts["CASP14",],
cell_subtype=meta_sc$cell_subtype)
ggplot(df_plot, aes(cell_subtype, expr, fill=cell_subtype) ) +
geom_violin(trim=T,scale="width") +
theme_plot(rotate_x_axis=F, classic=T)
meta_tc<- lis_tcga$meta
norm_data<- lis_tcga$norm_data
df_plot<- data.frame(expr=norm_data["CASP14",],
Molecular_subtype=meta_tc$Molecular_clusters)
ggplot(df_plot, aes(Molecular_subtype, expr, fill=Molecular_subtype) ) +
geom_violin(trim=T,scale="width") +
theme_plot(rotate_x_axis=F, classic=T)
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: AlmaLinux 9.6 (Sage Margay)
##
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS IMKL; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
## [5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
## [7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Canada/Eastern
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] batchelor_1.18.1 GEDI_1.0.2
## [3] lsa_0.73.3 SnowballC_0.7.1
## [5] uwot_0.1.16 RColorBrewer_1.1-3
## [7] pheatmap_1.0.12 Matrix_1.6-4
## [9] ggplot2_3.5.2 SingleCellExperiment_1.24.0
## [11] SummarizedExperiment_1.32.0 Biobase_2.62.0
## [13] GenomicRanges_1.54.1 GenomeInfoDb_1.38.5
## [15] IRanges_2.36.0 S4Vectors_0.40.2
## [17] BiocGenerics_0.48.1 MatrixGenerics_1.14.0
## [19] matrixStats_1.2.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.4 xfun_0.41
## [3] bslib_0.6.1 lattice_0.22-5
## [5] vctrs_0.6.5 tools_4.3.1
## [7] bitops_1.0-7 generics_0.1.3
## [9] parallel_4.3.1 tibble_3.2.1
## [11] fansi_1.0.6 highr_0.10
## [13] pkgconfig_2.0.3 BiocNeighbors_1.20.2
## [15] sparseMatrixStats_1.14.0 lifecycle_1.0.4
## [17] GenomeInfoDbData_1.2.11 farver_2.1.1
## [19] compiler_4.3.1 munsell_0.5.0
## [21] codetools_0.2-19 htmltools_0.5.7
## [23] sass_0.4.8 RCurl_1.98-1.14
## [25] yaml_2.3.8 pillar_1.9.0
## [27] crayon_1.5.2 jquerylib_0.1.4
## [29] BiocParallel_1.36.0 DelayedArray_0.28.0
## [31] cachem_1.0.8 abind_1.4-5
## [33] rsvd_1.0.5 tidyselect_1.2.0
## [35] digest_0.6.33 BiocSingular_1.18.0
## [37] dplyr_1.1.4 labeling_0.4.3
## [39] fastmap_1.1.1 grid_4.3.1
## [41] colorspace_2.1-0 cli_3.6.2
## [43] SparseArray_1.2.3 magrittr_2.0.3
## [45] S4Arrays_1.2.0 utf8_1.2.4
## [47] withr_2.5.2 DelayedMatrixStats_1.24.0
## [49] scales_1.3.0 rmarkdown_2.29
## [51] XVector_0.42.0 igraph_2.1.4
## [53] ResidualMatrix_1.12.0 RcppEigen_0.3.3.9.4
## [55] beachmat_2.18.0 ScaledMatrix_1.10.0
## [57] evaluate_0.23 knitr_1.45
## [59] irlba_2.3.5.1 rlang_1.1.6
## [61] Rcpp_1.0.12 scuttle_1.12.0
## [63] glue_1.7.0 jsonlite_1.8.8
## [65] R6_2.5.1 zlibbioc_1.48.0