library(ggplot2)
library(Matrix)
library(pheatmap)
library(ComplexHeatmap)
library(circlize)
library(SingleCellExperiment)
library(scConvexNMF)
library(survminer)
library(survival)
library(pROC)
#' Function to return object for ggplot2 plotting
#' @param size_text Size of the text to use for plotting
#' @param leg_pos Position of legend
#' @return ggplot theme object
theme_plot <- function(size_text=15,
leg_pos="right",
size_text_titles=NULL,
classic=FALSE,
line_size=0.85,
line_size_color="black",
axis_ticks_length=0.25,
axis_ticks_color="black",
rotate_x_axis=TRUE
){
require(ggplot2)
if( is.null(size_text_titles) ){
size_text_titles<- size_text + 2
}
if( classic ){
gg.plots<- theme_classic()
}else{
gg.plots<- theme_bw()
}
gg.plots <- gg.plots +
theme(axis.line=element_line(linewidth=line_size, colour=line_size_color),
axis.ticks=element_line(colour=axis_ticks_color, linewidth=line_size),
axis.ticks.length=unit(axis_ticks_length, "cm"),
axis.title.x=element_text(size=size_text_titles),
legend.position=leg_pos,
plot.title=element_text(hjust=0.5, size=size_text),
axis.text.y=element_text(size=size_text, color="black"),
axis.title.y=element_text(size=size_text_titles) )
if( rotate_x_axis) {
gg.plots<- gg.plots +
theme(axis.text.x=element_text(size=size_text, angle=45, hjust=1, color=line_size_color) )
}else{
gg.plots<- gg.plots +
theme(axis.text.x = element_text(size=size_text, angle = 90, vjust = 0.5, hjust=1, color=line_size_color))
}
}
The gene expression data can be found at the GEO GSE328575.
# Reading SCE object
sce<- readRDS("sce.rds")
meta<- data.frame(colData(sce))
Read metastatic signature scores, wwhich can be found at Zenodo.
met_scores<- readRDS("RCC_metastasis_score.rds")
dim(met_scores)
## [1] 40194 1
length(intersect(rownames(met_scores), rownames(meta)))
## [1] 40194
meta<- meta[rownames(met_scores),]
meta<- cbind(meta, met_scores)
Read TCGA data
# Reading SCE object
lis_tcga<- readRDS("lis_tcga.rds")
meta_tc<- lis_tcga$meta
print( plot.roc( meta$tumor_site ~ meta$Metastasis_score), print.auc=T)
## Setting levels: control = metastasis, case = primary
## Setting direction: controls > cases
##
## Call:
## plot.roc.formula(x = meta$tumor_site ~ meta$Metastasis_score)
##
## Data: (unknown) in 27440 controls (meta$tumor_site ~ meta$Metastasis_score metastasis) > 12754 cases (meta$tumor_site ~ meta$Metastasis_score primary).
## Area under the curve: 0.9118
vec_median_sample<- tapply(meta$Metastasis_score, meta$Sample, median)
# Ordering samples
sm_df<- unique(meta[,c("Sample", "tumor_site")])
sm_df$Median_prob<- vec_median_sample[sm_df$Sample]
sm_df$tumor_site<- factor(sm_df$tumor_site, levels=c("primary", "metastasis"))
rownames(sm_df)<- sm_df$Sample
df_class<- unique(meta[,c("Sample", "class")])
rownames(df_class)<- df_class$Sample
dim(sm_df)
## [1] 14 3
dim(df_class)
## [1] 14 2
length(intersect(rownames(df_class), rownames(sm_df)))
## [1] 14
sm_df$Class<- df_class[rownames(sm_df),"class"]
sm_df$Class<- factor(sm_df$Class, levels=c("patient", "PDX") )
sm_df<- sm_df[order(sm_df$Class, sm_df$tumor_site, sm_df$Median_prob ),]
meta$Sample<- factor(meta$Sample, levels=sm_df$Sample )
ggplot( meta, aes( x = Sample, y= Metastasis_score, fill=tumor_site ) ) +
geom_violin(scale="width", trim=T)+
geom_boxplot(width=0.1, outlier.shape=NA) +
stat_summary(fun = "mean",
geom = "point",
color = "red", shape=17) +
theme_plot(classic=T)
## Filter for ccRCC
cancer_type<- "ccRCC"
meta_ccrcc<- meta_tc[meta_tc$Molecular_clusters == cancer_type,]
# Remove samples for which we don't have purity estimates (3 amples)
meta_ccrcc<- meta_ccrcc[!is.na(meta_ccrcc$purity),]
n_bins<- 20
n_space<- 1/n_bins
vec_quant<- seq(0,1, n_space)
min_prop<- 0.1
max_prop<- 1-min_prop
vec_quant<- vec_quant[vec_quant > min_prop]
vec_quant<- vec_quant[vec_quant < max_prop]
vec_pvals<- c()
vec_coefs<- c()
# Test
for( quantile_use in vec_quant ){
meta_use<- meta_ccrcc
meta_use[,"metastatic.class"]<- ifelse(meta_use[,"full_metastatic_score"] > quantile(meta_use[,"full_metastatic_score"], quantile_use), "high", "low")
meta_use[,"metastatic.class"]<- factor(meta_use[,"metastatic.class"], levels=c("low", "high"))
model <- coxph(Surv(OS.time, OS) ~ metastatic.class + purity + Gender, data = meta_use)
vec_pvals<- c(vec_pvals, summary(model)$coefficients["metastatic.classhigh","Pr(>|z|)"])
vec_coefs<- c(vec_coefs, summary(model)$coefficients["metastatic.classhigh","coef"])
}
vec_padj<- p.adjust(vec_pvals, method="fdr")
index_top<- which(vec_padj == min(vec_padj) )
# Print results
vec_padj[index_top]
## [1] 7.863086e-13
vec_coefs[index_top]
## [1] 1.250591
quantile(meta_use$full_metastatic_score, vec_quant[index_top])
## 80%
## 0.9999916
# Plot
quantile_use<- vec_quant[index_top]
meta_use<- meta_ccrcc
meta_use[,"metastatic.class"]<- ifelse(meta_use[,"full_metastatic_score"] > quantile(meta_use[,"full_metastatic_score"], quantile_use), "high", "low")
meta_use[,"metastatic.class"]<- factor(meta_use[,"metastatic.class"], levels=c("high", "low"))
fit <- survfit(Surv(OS.time, OS) ~ metastatic.class, data = meta_use)
ggsurvplot(fit, data = meta_use, risk.table = TRUE, conf.int = TRUE, pval=TRUE)
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: AlmaLinux 9.6 (Sage Margay)
##
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS IMKL; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
## [5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
## [7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Canada/Eastern
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] pROC_1.18.5 survival_3.5-7
## [3] survminer_0.5.0 ggpubr_0.6.0
## [5] scConvexNMF_0.0.0.9000 SingleCellExperiment_1.24.0
## [7] SummarizedExperiment_1.32.0 Biobase_2.62.0
## [9] GenomicRanges_1.54.1 GenomeInfoDb_1.38.5
## [11] IRanges_2.36.0 S4Vectors_0.40.2
## [13] BiocGenerics_0.48.1 MatrixGenerics_1.14.0
## [15] matrixStats_1.2.0 circlize_0.4.15
## [17] ComplexHeatmap_2.18.0 pheatmap_1.0.12
## [19] Matrix_1.6-4 ggplot2_3.5.2
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 gridExtra_2.3 rlang_1.1.6
## [4] magrittr_2.0.3 clue_0.3-65 GetoptLong_1.0.5
## [7] compiler_4.3.1 png_0.1-8 vctrs_0.6.5
## [10] stringr_1.5.1 pkgconfig_2.0.3 shape_1.4.6
## [13] crayon_1.5.2 fastmap_1.1.1 backports_1.4.1
## [16] XVector_0.42.0 labeling_0.4.3 KMsurv_0.1-6
## [19] utf8_1.2.4 rmarkdown_2.29 markdown_1.12
## [22] purrr_1.0.2 xfun_0.41 zlibbioc_1.48.0
## [25] cachem_1.0.8 jsonlite_1.8.8 highr_0.10
## [28] DelayedArray_0.28.0 broom_1.0.5 parallel_4.3.1
## [31] cluster_2.1.6 R6_2.5.1 stringi_1.8.3
## [34] bslib_0.6.1 RColorBrewer_1.1-3 car_3.1-2
## [37] jquerylib_0.1.4 Rcpp_1.0.12 iterators_1.0.14
## [40] knitr_1.45 zoo_1.8-12 splines_4.3.1
## [43] tidyselect_1.2.0 abind_1.4-5 yaml_2.3.8
## [46] doParallel_1.0.17 ggtext_0.1.2 codetools_0.2-19
## [49] lattice_0.22-5 tibble_3.2.1 plyr_1.8.9
## [52] withr_2.5.2 evaluate_0.23 xml2_1.3.6
## [55] survMisc_0.5.6 pillar_1.9.0 carData_3.0-5
## [58] foreach_1.5.2 generics_0.1.3 RCurl_1.98-1.14
## [61] commonmark_1.9.0 munsell_0.5.0 scales_1.3.0
## [64] xtable_1.8-4 glue_1.7.0 tools_4.3.1
## [67] data.table_1.14.10 ggsignif_0.6.4 tidyr_1.3.0
## [70] colorspace_2.1-0 GenomeInfoDbData_1.2.11 cli_3.6.2
## [73] km.ci_0.5-6 fansi_1.0.6 S4Arrays_1.2.0
## [76] dplyr_1.1.4 gtable_0.3.4 rstatix_0.7.2
## [79] sass_0.4.8 digest_0.6.33 SparseArray_1.2.3
## [82] rjson_0.2.21 farver_2.1.1 htmltools_0.5.7
## [85] lifecycle_1.0.4 GlobalOptions_0.1.2 gridtext_0.1.5