In this tutorial, we are going to download the PBMC data from this publication. This is a collection of immune cell types profiled from peripherial blood from two human donors across six single-cell RNA-seq technologies (10x Chromium v2 and v3, CEL-seq2, Drop-seq, inDrops, Seq-Well and Smart-seq2).

Load libraries

library(SingleCellExperiment)
library(HDF5Array)
library(GEDI)
library(SeuratData)
set.seed(43)

Downloading data

The raw count matrices and metadata are available through the SeuratData package:

InstallData("pbmcsca") # Install pbmcsca object

We will generate a Single Cell Experiment (SCE) Object.

data("pbmcsca") # Load the Seurat Object
meta<- pbmcsca[[]] # Obtaining metadata
meta$Barcode<- rownames(meta) # Naming Barcode
sce<- SingleCellExperiment(list(counts=Seurat::GetAssayData(object = pbmcsca, slot = "counts")), colData=meta) # create SCE
sce
## class: SingleCellExperiment 
## dim: 33694 31021 
## metadata(0):
## assays(1): counts
## rownames(33694): TSPAN6 TNMD ... RP11-107E5.4 RP11-299P2.2
## rowData names(0):
## colnames(31021): pbmc1_SM2_Cell_108 pbmc1_SM2_Cell_115 ...
##   pbmc2_inDrops_1_TGAATCCT.TTATGCGA.CATCTCCC
##   pbmc2_inDrops_1_TGAGCACA.GAGCCTTA.CGAGTCTG
## colData names(11): orig.ident nCount_RNA ... Method Barcode
## reducedDimNames(0):
## altExpNames(0):

We will also load the GEDI model for the pbmc data, which can be accessed in here.

model<- readRDS("pbmc_gedi_model_bothDonors.rds")

We can then match with the cell barcodes that were used in the manuscript.

length(model$aux$cellIDs)
## [1] 28035
ncol(sce)
## [1] 31021
length(intersect(model$aux$cellIDs, colnames(sce) ) )
## [1] 28035
sce<- sce[,model$aux$cellIDs]

Save object

The single cell experiment then can be saved.

saveHDF5SummarizedExperiment(sce, dir="../data_objects/pbmc_SCE")
sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /cvmfs/soft.computecanada.ca/easybuild/software/2020/Core/imkl/2020.1.217/compilers_and_libraries_2020.1.217/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
## 
## locale:
##  [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8    
##  [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8   
##  [7] LC_PAPER=en_CA.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] pbmcsca.SeuratData_3.0.0     pancreasref.SeuratData_1.0.0
##  [3] panc8.SeuratData_3.0.2       kidneyref.SeuratData_1.0.1  
##  [5] ifnb.SeuratData_3.1.0        hcabm40k.SeuratData_3.0.0   
##  [7] SeuratData_0.2.2             GEDI_0.0.0.9000             
##  [9] HDF5Array_1.18.1             rhdf5_2.34.0                
## [11] DelayedArray_0.16.3          Matrix_1.3-3                
## [13] SingleCellExperiment_1.12.0  SummarizedExperiment_1.20.0 
## [15] Biobase_2.50.0               GenomicRanges_1.42.0        
## [17] GenomeInfoDb_1.26.7          IRanges_2.24.1              
## [19] S4Vectors_0.28.1             BiocGenerics_0.36.0         
## [21] MatrixGenerics_1.2.1         matrixStats_0.58.0          
## 
## loaded via a namespace (and not attached):
##   [1] RcppEigen_0.3.3.9.1    plyr_1.8.6             igraph_1.3.4          
##   [4] lazyeval_0.2.2         sp_1.4-5               splines_4.0.0         
##   [7] listenv_0.8.0          scattermore_0.7        ggplot2_3.4.2         
##  [10] digest_0.6.33          htmltools_0.5.6        fansi_1.0.4           
##  [13] magrittr_2.0.3         tensor_1.5             cluster_2.1.0         
##  [16] ROCR_1.0-11            globals_0.16.2         spatstat.sparse_2.0-0 
##  [19] colorspace_2.0-0       rappdirs_0.3.3         ggrepel_0.9.2         
##  [22] xfun_0.22              dplyr_1.1.1            crayon_1.5.2          
##  [25] RCurl_1.98-1.3         jsonlite_1.8.7         progressr_0.10.1      
##  [28] spatstat.data_2.1-0    survival_3.2-11        zoo_1.8-9             
##  [31] glue_1.6.2             polyclip_1.10-0        gtable_0.3.0          
##  [34] zlibbioc_1.36.0        XVector_0.30.0         leiden_0.3.8          
##  [37] Rhdf5lib_1.12.1        future.apply_1.7.0     abind_1.4-5           
##  [40] scales_1.2.1           DBI_1.1.1              miniUI_0.1.1.1        
##  [43] Rcpp_1.0.8.3           viridisLite_0.4.2      xtable_1.8-4          
##  [46] reticulate_1.18        spatstat.core_2.1-2    htmlwidgets_1.5.3     
##  [49] httr_1.4.5             RColorBrewer_1.1-2     ellipsis_0.3.2        
##  [52] Seurat_4.1.1           ica_1.0-2              pkgconfig_2.0.3       
##  [55] sass_0.4.7             uwot_0.1.10            deldir_0.2-10         
##  [58] utf8_1.2.3             tidyselect_1.2.0       rlang_1.1.1           
##  [61] reshape2_1.4.4         later_1.1.0.1          munsell_0.5.0         
##  [64] tools_4.0.0            cachem_1.0.4           cli_3.6.1             
##  [67] generics_0.1.3         ggridges_0.5.3         evaluate_0.22         
##  [70] stringr_1.5.0          fastmap_1.1.1          yaml_2.2.1            
##  [73] goftest_1.2-2          knitr_1.31             fitdistrplus_1.1-3    
##  [76] purrr_1.0.1            RANN_2.6.1             pbapply_1.4-3         
##  [79] future_1.33.0          nlme_3.1-147           mime_0.10             
##  [82] compiler_4.0.0         plotly_4.9.3           png_0.1-7             
##  [85] spatstat.utils_2.1-0   tibble_3.2.1           bslib_0.5.1           
##  [88] stringi_1.5.3          rgeos_0.5-9            lattice_0.20-41       
##  [91] vctrs_0.6.1            pillar_1.9.0           lifecycle_1.0.3       
##  [94] rhdf5filters_1.2.0     spatstat.geom_2.1-0    lmtest_0.9-38         
##  [97] jquerylib_0.1.3        RcppAnnoy_0.0.18       data.table_1.14.0     
## [100] cowplot_1.1.1          bitops_1.0-6           irlba_2.3.3           
## [103] httpuv_1.5.5           patchwork_1.1.1        R6_2.5.0              
## [106] promises_1.2.0.1       KernSmooth_2.23-16     gridExtra_2.3         
## [109] parallelly_1.36.0      codetools_0.2-16       MASS_7.3-51.5         
## [112] SeuratObject_4.1.0     sctransform_0.3.3      GenomeInfoDbData_1.2.4
## [115] mgcv_1.8-31            grid_4.0.0             rpart_4.1-15          
## [118] tidyr_1.3.0            rmarkdown_2.7          Rtsne_0.15            
## [121] shiny_1.6.0