Loading libraries

library(ggplot2)
library(Matrix)
library(pheatmap)
library(ComplexHeatmap)
library(circlize)
library(SingleCellExperiment)
library(scConvexNMF)
library(survminer)
library(survival)
library(pROC)

Functions

#' Function to return object for ggplot2 plotting
#' @param size_text Size of the text to use for plotting
#' @param leg_pos Position of legend
#' @return ggplot theme object
theme_plot <- function(size_text=15,
                           leg_pos="right",
                           size_text_titles=NULL,
                           classic=FALSE,
                           line_size=0.85,
                           line_size_color="black",
                           axis_ticks_length=0.25,
                           axis_ticks_color="black",
                           rotate_x_axis=TRUE
                           ){
    require(ggplot2)
    if( is.null(size_text_titles) ){
        size_text_titles<- size_text + 2
    }
    if( classic ){
        gg.plots<- theme_classic()
    }else{
        gg.plots<- theme_bw()
    }
    gg.plots <- gg.plots +
        theme(axis.line=element_line(linewidth=line_size, colour=line_size_color),
              axis.ticks=element_line(colour=axis_ticks_color, linewidth=line_size),
              axis.ticks.length=unit(axis_ticks_length, "cm"),
              axis.title.x=element_text(size=size_text_titles),
              legend.position=leg_pos,
              plot.title=element_text(hjust=0.5, size=size_text),
              axis.text.y=element_text(size=size_text, color="black"),
              axis.title.y=element_text(size=size_text_titles) )
    if( rotate_x_axis) {
        gg.plots<- gg.plots +
            theme(axis.text.x=element_text(size=size_text, angle=45, hjust=1, color=line_size_color) )
    }else{
        gg.plots<- gg.plots +
            theme(axis.text.x = element_text(size=size_text, angle = 90, vjust = 0.5, hjust=1, color=line_size_color))
    }
}

Loading data

The gene expression data can be found at the GEO GSE328575.

# Reading SCE object
sce<- readRDS("sce.rds")
meta<- data.frame(colData(sce))

Read metastatic signature scores, wwhich can be found at Zenodo.

met_scores<- readRDS("RCC_metastasis_score.rds")

dim(met_scores)
## [1] 40194     1
length(intersect(rownames(met_scores), rownames(meta)))
## [1] 40194
meta<- meta[rownames(met_scores),]
meta<- cbind(meta, met_scores)

Read TCGA data

# Reading SCE object
lis_tcga<- readRDS("lis_tcga.rds")

meta_tc<- lis_tcga$meta

Violin plot of metastatic scores in RCC data

print( plot.roc( meta$tumor_site ~ meta$Metastasis_score), print.auc=T)
## Setting levels: control = metastasis, case = primary
## Setting direction: controls > cases

## 
## Call:
## plot.roc.formula(x = meta$tumor_site ~ meta$Metastasis_score)
## 
## Data: (unknown) in 27440 controls (meta$tumor_site ~ meta$Metastasis_score metastasis) > 12754 cases (meta$tumor_site ~ meta$Metastasis_score primary).
## Area under the curve: 0.9118
vec_median_sample<- tapply(meta$Metastasis_score, meta$Sample, median)

# Ordering samples
sm_df<- unique(meta[,c("Sample", "tumor_site")])
sm_df$Median_prob<- vec_median_sample[sm_df$Sample]
sm_df$tumor_site<- factor(sm_df$tumor_site, levels=c("primary", "metastasis"))
rownames(sm_df)<- sm_df$Sample

df_class<- unique(meta[,c("Sample", "class")])
rownames(df_class)<- df_class$Sample

dim(sm_df)
## [1] 14  3
dim(df_class)
## [1] 14  2
length(intersect(rownames(df_class), rownames(sm_df)))
## [1] 14
sm_df$Class<- df_class[rownames(sm_df),"class"]
sm_df$Class<- factor(sm_df$Class, levels=c("patient", "PDX") )

sm_df<- sm_df[order(sm_df$Class, sm_df$tumor_site, sm_df$Median_prob ),]

meta$Sample<- factor(meta$Sample, levels=sm_df$Sample )

ggplot( meta, aes( x = Sample, y= Metastasis_score, fill=tumor_site )  ) +
    geom_violin(scale="width", trim=T)+        
    geom_boxplot(width=0.1, outlier.shape=NA) +
    stat_summary(fun = "mean",
                 geom = "point",
                 color = "red", shape=17) +    
    theme_plot(classic=T)

Overall survival

## Filter for ccRCC
cancer_type<- "ccRCC"
meta_ccrcc<- meta_tc[meta_tc$Molecular_clusters == cancer_type,]
# Remove samples for which we don't have purity estimates (3 amples)
meta_ccrcc<- meta_ccrcc[!is.na(meta_ccrcc$purity),]

n_bins<- 20
n_space<- 1/n_bins
vec_quant<- seq(0,1, n_space)
min_prop<- 0.1
max_prop<- 1-min_prop
vec_quant<- vec_quant[vec_quant > min_prop]
vec_quant<- vec_quant[vec_quant < max_prop]
vec_pvals<- c()
vec_coefs<- c()
# Test
for( quantile_use in vec_quant ){
    meta_use<- meta_ccrcc
    meta_use[,"metastatic.class"]<- ifelse(meta_use[,"full_metastatic_score"] > quantile(meta_use[,"full_metastatic_score"], quantile_use), "high", "low")
    meta_use[,"metastatic.class"]<- factor(meta_use[,"metastatic.class"], levels=c("low", "high"))    
    model <- coxph(Surv(OS.time, OS) ~ metastatic.class + purity + Gender, data = meta_use) 
    vec_pvals<- c(vec_pvals, summary(model)$coefficients["metastatic.classhigh","Pr(>|z|)"])
    vec_coefs<- c(vec_coefs, summary(model)$coefficients["metastatic.classhigh","coef"])
}
vec_padj<- p.adjust(vec_pvals, method="fdr")
index_top<- which(vec_padj == min(vec_padj) )
# Print results
vec_padj[index_top]
## [1] 7.863086e-13
vec_coefs[index_top]
## [1] 1.250591
quantile(meta_use$full_metastatic_score, vec_quant[index_top])
##       80% 
## 0.9999916
# Plot
quantile_use<- vec_quant[index_top]
meta_use<- meta_ccrcc
meta_use[,"metastatic.class"]<- ifelse(meta_use[,"full_metastatic_score"] > quantile(meta_use[,"full_metastatic_score"], quantile_use), "high", "low")
meta_use[,"metastatic.class"]<- factor(meta_use[,"metastatic.class"], levels=c("high", "low"))    
fit <- survfit(Surv(OS.time, OS) ~ metastatic.class, data = meta_use)
ggsurvplot(fit, data = meta_use, risk.table = TRUE, conf.int = TRUE, pval=TRUE)

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: AlmaLinux 9.6 (Sage Margay)
## 
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS IMKL;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8    
##  [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8   
##  [7] LC_PAPER=en_CA.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Canada/Eastern
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pROC_1.18.5                 survival_3.5-7             
##  [3] survminer_0.5.0             ggpubr_0.6.0               
##  [5] scConvexNMF_0.0.0.9000      SingleCellExperiment_1.24.0
##  [7] SummarizedExperiment_1.32.0 Biobase_2.62.0             
##  [9] GenomicRanges_1.54.1        GenomeInfoDb_1.38.5        
## [11] IRanges_2.36.0              S4Vectors_0.40.2           
## [13] BiocGenerics_0.48.1         MatrixGenerics_1.14.0      
## [15] matrixStats_1.2.0           circlize_0.4.15            
## [17] ComplexHeatmap_2.18.0       pheatmap_1.0.12            
## [19] Matrix_1.6-4                ggplot2_3.5.2              
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7            gridExtra_2.3           rlang_1.1.6            
##  [4] magrittr_2.0.3          clue_0.3-65             GetoptLong_1.0.5       
##  [7] compiler_4.3.1          png_0.1-8               vctrs_0.6.5            
## [10] stringr_1.5.1           pkgconfig_2.0.3         shape_1.4.6            
## [13] crayon_1.5.2            fastmap_1.1.1           backports_1.4.1        
## [16] XVector_0.42.0          labeling_0.4.3          KMsurv_0.1-6           
## [19] utf8_1.2.4              rmarkdown_2.29          markdown_1.12          
## [22] purrr_1.0.2             xfun_0.41               zlibbioc_1.48.0        
## [25] cachem_1.0.8            jsonlite_1.8.8          highr_0.10             
## [28] DelayedArray_0.28.0     broom_1.0.5             parallel_4.3.1         
## [31] cluster_2.1.6           R6_2.5.1                stringi_1.8.3          
## [34] bslib_0.6.1             RColorBrewer_1.1-3      car_3.1-2              
## [37] jquerylib_0.1.4         Rcpp_1.0.12             iterators_1.0.14       
## [40] knitr_1.45              zoo_1.8-12              splines_4.3.1          
## [43] tidyselect_1.2.0        abind_1.4-5             yaml_2.3.8             
## [46] doParallel_1.0.17       ggtext_0.1.2            codetools_0.2-19       
## [49] lattice_0.22-5          tibble_3.2.1            plyr_1.8.9             
## [52] withr_2.5.2             evaluate_0.23           xml2_1.3.6             
## [55] survMisc_0.5.6          pillar_1.9.0            carData_3.0-5          
## [58] foreach_1.5.2           generics_0.1.3          RCurl_1.98-1.14        
## [61] commonmark_1.9.0        munsell_0.5.0           scales_1.3.0           
## [64] xtable_1.8-4            glue_1.7.0              tools_4.3.1            
## [67] data.table_1.14.10      ggsignif_0.6.4          tidyr_1.3.0            
## [70] colorspace_2.1-0        GenomeInfoDbData_1.2.11 cli_3.6.2              
## [73] km.ci_0.5-6             fansi_1.0.6             S4Arrays_1.2.0         
## [76] dplyr_1.1.4             gtable_0.3.4            rstatix_0.7.2          
## [79] sass_0.4.8              digest_0.6.33           SparseArray_1.2.3      
## [82] rjson_0.2.21            farver_2.1.1            htmltools_0.5.7        
## [85] lifecycle_1.0.4         GlobalOptions_0.1.2     gridtext_0.1.5