Mark genes
get_hvg <- function(sce10x_subset, bio_thresh) {
dec <-
modelGeneVar(sce10x_subset)
top_dec <- dec[order(dec$bio, decreasing = TRUE), ]
top_dec <- top_dec %>%
as.data.frame %>%
rownames_to_column(., var = "gene_name") %>%
as_tibble() %>%
mutate(
top_hvg = if_else(bio > bio_thresh, TRUE, FALSE),
trend = metadata(dec)$trend(mean)
) %>%
select(gene_name, top_hvg, bio, mean, total, trend)
return(top_dec)
}
mark_retained_genes <-
function(sce10x_subset,
detection_limit = 3,
min_cells = 3,
bio_thresh = 0.01) {
dt <-
tibble(
gene_name = rownames(assay(sce10x_subset, "spliced")),
s = rowSums(assay(sce10x_subset, "spliced")),
u = rowSums(assay(sce10x_subset, "unspliced")),
t = rowSums(
!(
assay(sce10x_subset, "spliced") < detection_limit &
assay(sce10x_subset, "unspliced") < detection_limit
) &
(
assay(sce10x_subset, "spliced") > 0 &
assay(sce10x_subset, "unspliced") > 0
)
)
) %>%
mutate(
nonzero = case_when(
s == 0 & u != 0 ~ "s_zero",
u == 0 & s != 0 ~ "u_zero",
s == 0 & s == 0 ~ "neither",
s != 0 & s != 0 ~ "both"
) ,
expressed = (t >= min_cells),
keep = expressed & nonzero == "both",
expressed = NULL,
s = NULL,
u = NULL,
t = NULL
)
top_dec <- get_hvg(sce10x_subset, bio_thresh)
dt <-
left_join(dt,
top_dec,
by = "gene_name")
rowData(sce10x_subset) <-
left_join(
rowData(sce10x_subset) %>%
as_tibble() %>%
select(-nexprs_l1_u, -nexprs_l1_s,-keep),
dt,
by = "gene_name"
) %>%
column_to_rownames(var = "gene_name") %>%
DataFrame(.)
return(sce10x_subset)
}
sce10x_stem <- mark_retained_genes(sce10x_stem)
sce10x_fap <- mark_retained_genes(sce10x_fap)
sce10x_macrophage <- mark_retained_genes(sce10x_macrophage)
rowData(sce10x_stem) %>%
as_tibble() %>%
select(nonzero, keep, top_hvg) %>%
count(keep, top_hvg, nonzero)
rowData(sce10x_fap) %>%
as_tibble() %>%
select(nonzero, keep, top_hvg) %>%
count(keep, top_hvg, nonzero)
rowData(sce10x_macrophage) %>%
as_tibble() %>%
select(nonzero, keep, top_hvg) %>%
count(keep, top_hvg, nonzero)
ggplot(rowData(sce10x_stem) %>%
as_tibble()) +
geom_point(aes(mean,
total,
color=top_hvg)) +
geom_line(aes(mean,
trend),
colour="black")
ggplot(rowData(sce10x_fap) %>%
as_tibble()) +
geom_point(aes(mean,
total,
color=top_hvg)) +
geom_line(aes(mean,
trend),
colour="black")
ggplot(rowData(sce10x_macrophage) %>%
as_tibble()) +
geom_point(aes(mean,
total,
color=top_hvg)) +
geom_line(aes(mean,
trend),
colour="black")
Make common rowData table
stem_meta <- rowData(sce10x_stem)[,6:8]
fap_meta <-rowData(sce10x_fap)[,6:8]
macrophage_meta <- rowData(sce10x_macrophage)[,6:8]
col_names <- colnames(stem_meta)
colnames(stem_meta) <- paste0(col_names, "_stem" )
colnames(fap_meta) <- paste0(col_names, "_fap" )
colnames(macrophage_meta) <- paste0(col_names, "_macrophage" )
row_data_common<-
cbind(rowData(sce10x_stem)[,1:5],
stem_meta,
fap_meta,
macrophage_meta)
rowData(sce10x_stem) <- row_data_common
rowData(sce10x_fap) <- row_data_common
rowData(sce10x_macrophage) <- row_data_common
Combine and save
sce10x <-
cbind(sce10x_stem,
sce10x_fap,
sce10x_macrophage)
saveRDS(
sce10x,
file.path(
data_dir,
"preprocessed",
"sce10x_filtered_final.rds"
)
)
sessionInfo()
R version 4.0.1 (2020-06-06)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] LoomExperiment_1.6.0 rtracklayer_1.48.0 rhdf5_2.32.1 forcats_0.5.0
[5] stringr_1.4.0 dplyr_1.0.0 purrr_0.3.4 readr_1.3.1
[9] tidyr_1.1.0 tibble_3.0.1 tidyverse_1.3.0 scran_1.16.0
[13] scater_1.16.1 ggplot2_3.3.2 SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.1
[17] DelayedArray_0.14.0 matrixStats_0.56.0 Biobase_2.48.0 GenomicRanges_1.40.0
[21] GenomeInfoDb_1.24.2 IRanges_2.22.2 S4Vectors_0.26.1 BiocGenerics_0.34.0
loaded via a namespace (and not attached):
[1] nlme_3.1-147 fs_1.4.1 bitops_1.0-6 lubridate_1.7.9
[5] httr_1.4.1 rprojroot_1.3-2 tools_4.0.1 backports_1.1.8
[9] R6_2.4.1 irlba_2.3.3 HDF5Array_1.16.1 vipor_0.4.5
[13] DBI_1.1.0 colorspace_1.4-1 withr_2.2.0 tidyselect_1.1.0
[17] gridExtra_2.3 compiler_4.0.1 cli_2.0.2 rvest_0.3.5
[21] BiocNeighbors_1.6.0 xml2_1.3.2 labeling_0.3 scales_1.1.1
[25] digest_0.6.25 Rsamtools_2.4.0 rmarkdown_2.3 XVector_0.28.0
[29] base64enc_0.1-3 pkgconfig_2.0.3 htmltools_0.5.0 dbplyr_1.4.4
[33] limma_3.44.3 readxl_1.3.1 rlang_0.4.6 rstudioapi_0.11
[37] DelayedMatrixStats_1.10.0 farver_2.0.3 generics_0.0.2 jsonlite_1.6.1
[41] BiocParallel_1.22.0 RCurl_1.98-1.2 magrittr_1.5 BiocSingular_1.4.0
[45] GenomeInfoDbData_1.2.3 Matrix_1.2-18 fansi_0.4.1 Rcpp_1.0.4.6
[49] ggbeeswarm_0.6.0 munsell_0.5.0 Rhdf5lib_1.10.0 viridis_0.5.1
[53] lifecycle_0.2.0 stringi_1.4.6 yaml_2.2.1 edgeR_3.30.3
[57] zlibbioc_1.34.0 grid_4.0.1 blob_1.2.1 dqrng_0.2.1
[61] crayon_1.3.4 lattice_0.20-41 Biostrings_2.56.0 haven_2.3.1
[65] hms_0.5.3 locfit_1.5-9.4 knitr_1.29 pillar_1.4.4
[69] igraph_1.2.5 reprex_0.3.0 XML_3.99-0.3 glue_1.4.1
[73] evaluate_0.14 modelr_0.1.8 vctrs_0.3.1 cellranger_1.1.0
[77] gtable_0.3.0 assertthat_0.2.1 xfun_0.15 rsvd_1.0.3
[81] broom_0.5.6 viridisLite_0.3.0 GenomicAlignments_1.24.0 beeswarm_0.2.3
[85] statmod_1.4.34 ellipsis_0.3.1
LS0tCnRpdGxlOiAiTW91c2UgTXVzY2xlIFN0ZW0gQ2VsbCBQcm9qZWN0ICIKc3VidGl0bGU6ICJQYXJ0IDQ6IGZlYXR1cmUgc2VsZWN0aW9uIgphdXRob3I6IAotIG5hbWU6IFJpY2sgRmFyb3VuaQogIGFmZmlsaWF0aW9uOgogIC0gJmNydWsgR8Opbm9tZSBRdcOpYmVjIElubm92YXRpb24gQ2VudHJlLCBNY0dpbGwgVW5pdmVyc2l0eSwgTW9udHJlYWwsIENhbmFkYQpkYXRlOiAnYHIgZm9ybWF0KFN5cy5EYXRlKCksICIlWS0lQi0lZCIpYCcKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6CiAgICBkZl9wcmludDogcGFnZWQKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgdG9jOiBubwogICAgdG9jX2Zsb2F0OiAKICAgICAgY29sbGFwc2VkOiBmYWxzZQogICAgICBzbW9vdGhfc2Nyb2xsOiBmYWxzZQotLS0KCgoKIyBQcmVwYXJlIGFuYWx5c2lzIHdvcmtmbG93CgojIyBTZXQgZmlsZXBhdGhzIGFuZCBwYXJhbWV0ZXJzCgpgYGB7ciBzZXR1cH0Kc2V0LnNlZWQoNDIpCmtuaXRyOjpvcHRzX2tuaXQkc2V0KHJvb3QuZGlyID0gcnByb2pyb290OjpmaW5kX3JzdHVkaW9fcm9vdF9maWxlKCkpCm9wdGlvbnMoCiAgcmVhZHIuc2hvd19wcm9ncmVzcyA9IEZBTFNFLAogIGRpZ2l0cyA9IDIKKQpgYGAKCiMjIExvYWQgcGFja2FnZXMKYGBge3J9CnN1cHByZXNzUGFja2FnZVN0YXJ0dXBNZXNzYWdlcyh7CiAgbGlicmFyeShzY2F0ZXIpCiAgbGlicmFyeShzY3JhbikKICBsaWJyYXJ5KHRpZHl2ZXJzZSkKfSkKYGBgCgojIyBEZWZpbmUgZmlsZSBwYXRocwoKYGBge3J9CmRhdGFfZGlyIDwtICIuL2RhdGEiCmZpZ3VyZXNfZGlyIDwtIGZpbGUucGF0aCgiLi9maWd1cmVzIikKYGBgCgoKYGBge3J9CnNjZTEweCA8LQogIHJlYWRSRFMoZmlsZS5wYXRoKAogICAgZGF0YV9kaXIsCiAgICAicHJlcHJvY2Vzc2VkIiwKICAgICJzY2UxMHhfZmlsdGVyZWRfZmluYWwucmRzIgogICkpCmBgYAoKCgojIFN1YnNldCBkYXRhCgoKYGBge3J9CnNjZTEweF9zdGVtIDwtIHNjZTEweFssY29sRGF0YShzY2UxMHgpJGNsdXN0ZXJzX2xldmVsMSA9PSJzdGVtIl0Kc2NlMTB4X2ZhcCA8LSBzY2UxMHhbLGNvbERhdGEoc2NlMTB4KSRjbHVzdGVyc19sZXZlbDEgPT0iZmFwIl0Kc2NlMTB4X21hY3JvcGhhZ2UgPC0gc2NlMTB4Wyxjb2xEYXRhKHNjZTEweCkkY2x1c3RlcnNfbGV2ZWwxID09Im1hY3JvcGhhZ2UiXQpgYGAKCgojIE1hcmsgZ2VuZXMKCmBgYHtyfQpnZXRfaHZnIDwtIGZ1bmN0aW9uKHNjZTEweF9zdWJzZXQsIGJpb190aHJlc2gpIHsKICBkZWMgPC0KICAgIG1vZGVsR2VuZVZhcihzY2UxMHhfc3Vic2V0KQogIAogIHRvcF9kZWMgPC0gZGVjW29yZGVyKGRlYyRiaW8sIGRlY3JlYXNpbmcgPSBUUlVFKSwgXQogIHRvcF9kZWMgPC0gdG9wX2RlYyAlPiUKICAgIGFzLmRhdGEuZnJhbWUgJT4lCiAgICByb3duYW1lc190b19jb2x1bW4oLiwgdmFyID0gImdlbmVfbmFtZSIpICU+JQogICAgYXNfdGliYmxlKCkgJT4lCiAgICBtdXRhdGUoCiAgICAgIHRvcF9odmcgPSBpZl9lbHNlKGJpbyA+IGJpb190aHJlc2gsIFRSVUUsIEZBTFNFKSwKICAgICAgdHJlbmQgPSBtZXRhZGF0YShkZWMpJHRyZW5kKG1lYW4pCiAgICApICU+JQogICAgc2VsZWN0KGdlbmVfbmFtZSwgdG9wX2h2ZywgYmlvLCBtZWFuLCB0b3RhbCwgdHJlbmQpCiAgCiAgcmV0dXJuKHRvcF9kZWMpCn0KCm1hcmtfcmV0YWluZWRfZ2VuZXMgPC0KICBmdW5jdGlvbihzY2UxMHhfc3Vic2V0LAogICAgICAgICAgIGRldGVjdGlvbl9saW1pdCA9IDMsCiAgICAgICAgICAgbWluX2NlbGxzID0gMywKICAgICAgICAgICBiaW9fdGhyZXNoID0gMC4wMSkgewogICAgZHQgPC0KICAgICAgdGliYmxlKAogICAgICAgIGdlbmVfbmFtZSA9IHJvd25hbWVzKGFzc2F5KHNjZTEweF9zdWJzZXQsICJzcGxpY2VkIikpLAogICAgICAgIHMgPSByb3dTdW1zKGFzc2F5KHNjZTEweF9zdWJzZXQsICJzcGxpY2VkIikpLAogICAgICAgIHUgPSByb3dTdW1zKGFzc2F5KHNjZTEweF9zdWJzZXQsICJ1bnNwbGljZWQiKSksCiAgICAgICAgdCA9IHJvd1N1bXMoCiAgICAgICAgICAhKAogICAgICAgICAgICBhc3NheShzY2UxMHhfc3Vic2V0LCAic3BsaWNlZCIpIDwgZGV0ZWN0aW9uX2xpbWl0ICYKICAgICAgICAgICAgICBhc3NheShzY2UxMHhfc3Vic2V0LCAidW5zcGxpY2VkIikgPCBkZXRlY3Rpb25fbGltaXQKICAgICAgICAgICkgJgogICAgICAgICAgICAoCiAgICAgICAgICAgICAgYXNzYXkoc2NlMTB4X3N1YnNldCwgInNwbGljZWQiKSA+IDAgJgogICAgICAgICAgICAgICAgYXNzYXkoc2NlMTB4X3N1YnNldCwgInVuc3BsaWNlZCIpID4gMAogICAgICAgICAgICApCiAgICAgICAgKQogICAgICApICU+JQogICAgICBtdXRhdGUoCiAgICAgICAgbm9uemVybyA9IGNhc2Vfd2hlbigKICAgICAgICAgIHMgPT0gMCAmIHUgIT0gMCB+ICJzX3plcm8iLAogICAgICAgICAgdSA9PSAwICYgcyAhPSAwIH4gInVfemVybyIsCiAgICAgICAgICBzID09IDAgJiBzID09IDAgfiAibmVpdGhlciIsCiAgICAgICAgICBzICE9IDAgJiBzICE9IDAgfiAiYm90aCIKICAgICAgICApICwKICAgICAgICBleHByZXNzZWQgPSAodCA+PSBtaW5fY2VsbHMpLAogICAgICAgIGtlZXAgPSBleHByZXNzZWQgJiBub256ZXJvID09ICJib3RoIiwKICAgICAgICBleHByZXNzZWQgPSBOVUxMLAogICAgICAgIHMgPSBOVUxMLAogICAgICAgIHUgPSBOVUxMLAogICAgICAgIHQgPSBOVUxMCiAgICAgICkKICAgIAogICAgdG9wX2RlYyA8LSBnZXRfaHZnKHNjZTEweF9zdWJzZXQsIGJpb190aHJlc2gpCiAgICAKICAgIGR0IDwtCiAgICAgIGxlZnRfam9pbihkdCwKICAgICAgICAgICAgICAgIHRvcF9kZWMsCiAgICAgICAgICAgICAgICBieSA9ICJnZW5lX25hbWUiKQogICAgCiAgICByb3dEYXRhKHNjZTEweF9zdWJzZXQpIDwtCiAgICAgIGxlZnRfam9pbigKICAgICAgICByb3dEYXRhKHNjZTEweF9zdWJzZXQpICU+JQogICAgICAgICAgYXNfdGliYmxlKCkgJT4lCiAgICAgICAgICBzZWxlY3QoLW5leHByc19sMV91LCAtbmV4cHJzX2wxX3MsLWtlZXApLAogICAgICAgIGR0LAogICAgICAgIGJ5ID0gImdlbmVfbmFtZSIKICAgICAgKSAlPiUKICAgICAgY29sdW1uX3RvX3Jvd25hbWVzKHZhciA9ICJnZW5lX25hbWUiKSAlPiUKICAgICAgRGF0YUZyYW1lKC4pCiAgICAKICAgIHJldHVybihzY2UxMHhfc3Vic2V0KQogIH0KYGBgCgoKCmBgYHtyfQpzY2UxMHhfc3RlbSA8LSBtYXJrX3JldGFpbmVkX2dlbmVzKHNjZTEweF9zdGVtKQpzY2UxMHhfZmFwIDwtIG1hcmtfcmV0YWluZWRfZ2VuZXMoc2NlMTB4X2ZhcCkKc2NlMTB4X21hY3JvcGhhZ2UgPC0gbWFya19yZXRhaW5lZF9nZW5lcyhzY2UxMHhfbWFjcm9waGFnZSkKYGBgCgoKCmBgYHtyfQpyb3dEYXRhKHNjZTEweF9zdGVtKSAlPiUgCiAgYXNfdGliYmxlKCkgJT4lCiAgc2VsZWN0KG5vbnplcm8sIGtlZXAsIHRvcF9odmcpICU+JQogIGNvdW50KGtlZXAsIHRvcF9odmcsIG5vbnplcm8pCmBgYAoKYGBge3J9CnJvd0RhdGEoc2NlMTB4X2ZhcCkgJT4lIAogIGFzX3RpYmJsZSgpICU+JQogIHNlbGVjdChub256ZXJvLCBrZWVwLCB0b3BfaHZnKSAlPiUKICBjb3VudChrZWVwLCB0b3BfaHZnLCBub256ZXJvKQpgYGAKCgpgYGB7cn0Kcm93RGF0YShzY2UxMHhfbWFjcm9waGFnZSkgJT4lIAogIGFzX3RpYmJsZSgpICU+JQogIHNlbGVjdChub256ZXJvLCBrZWVwLCB0b3BfaHZnKSAlPiUKICBjb3VudChrZWVwLCB0b3BfaHZnLCBub256ZXJvKQpgYGAKCgpgYGB7ciBmaWcud2lkdGg9MTJ9CmdncGxvdChyb3dEYXRhKHNjZTEweF9zdGVtKSAlPiUKICAgICAgICAgICAgICBhc190aWJibGUoKSkgKwogIGdlb21fcG9pbnQoYWVzKG1lYW4sCiAgICAgICAgICAgICAgICAgdG90YWwsCiAgICAgICAgICAgICAgICAgY29sb3I9dG9wX2h2ZykpICsKICBnZW9tX2xpbmUoYWVzKG1lYW4sCiAgICAgICAgICAgICAgICB0cmVuZCksIAogICAgICAgICAgICBjb2xvdXI9ImJsYWNrIikgCmBgYAoKCgpgYGB7ciBmaWcud2lkdGg9MTJ9CmdncGxvdChyb3dEYXRhKHNjZTEweF9mYXApICU+JQogICAgICAgICAgICAgIGFzX3RpYmJsZSgpKSArCiAgZ2VvbV9wb2ludChhZXMobWVhbiwKICAgICAgICAgICAgICAgICB0b3RhbCwKICAgICAgICAgICAgICAgICBjb2xvcj10b3BfaHZnKSkgKwogIGdlb21fbGluZShhZXMobWVhbiwKICAgICAgICAgICAgICAgIHRyZW5kKSwgCiAgICAgICAgICAgIGNvbG91cj0iYmxhY2siKSAKYGBgCgoKYGBge3IgZmlnLndpZHRoPTEyfQpnZ3Bsb3Qocm93RGF0YShzY2UxMHhfbWFjcm9waGFnZSkgJT4lCiAgICAgICAgICAgICAgYXNfdGliYmxlKCkpICsKICBnZW9tX3BvaW50KGFlcyhtZWFuLAogICAgICAgICAgICAgICAgIHRvdGFsLAogICAgICAgICAgICAgICAgIGNvbG9yPXRvcF9odmcpKSArCiAgZ2VvbV9saW5lKGFlcyhtZWFuLAogICAgICAgICAgICAgICAgdHJlbmQpLCAKICAgICAgICAgICAgY29sb3VyPSJibGFjayIpIApgYGAKCgojIyBNYWtlIGNvbW1vbiByb3dEYXRhIHRhYmxlCgpgYGB7cn0Kc3RlbV9tZXRhIDwtIHJvd0RhdGEoc2NlMTB4X3N0ZW0pWyw2OjhdCmZhcF9tZXRhIDwtcm93RGF0YShzY2UxMHhfZmFwKVssNjo4XQptYWNyb3BoYWdlX21ldGEgPC0gcm93RGF0YShzY2UxMHhfbWFjcm9waGFnZSlbLDY6OF0KY29sX25hbWVzIDwtIGNvbG5hbWVzKHN0ZW1fbWV0YSkgCmNvbG5hbWVzKHN0ZW1fbWV0YSkgPC0gcGFzdGUwKGNvbF9uYW1lcywgIl9zdGVtIiApCmNvbG5hbWVzKGZhcF9tZXRhKSA8LSBwYXN0ZTAoY29sX25hbWVzLCAiX2ZhcCIgKQpjb2xuYW1lcyhtYWNyb3BoYWdlX21ldGEpIDwtIHBhc3RlMChjb2xfbmFtZXMsICJfbWFjcm9waGFnZSIgKQoKcm93X2RhdGFfY29tbW9uPC0gCiAgY2JpbmQocm93RGF0YShzY2UxMHhfc3RlbSlbLDE6NV0sCiAgICAgICAgc3RlbV9tZXRhLCAKICAgICAgICBmYXBfbWV0YSwKICAgICAgICBtYWNyb3BoYWdlX21ldGEpCgpyb3dEYXRhKHNjZTEweF9zdGVtKSA8LSByb3dfZGF0YV9jb21tb24Kcm93RGF0YShzY2UxMHhfZmFwKSA8LSAgcm93X2RhdGFfY29tbW9uCnJvd0RhdGEoc2NlMTB4X21hY3JvcGhhZ2UpIDwtICByb3dfZGF0YV9jb21tb24KYGBgCgoKIyMgQ29tYmluZSBhbmQgc2F2ZQoKYGBge3J9CnNjZTEweCA8LQogIGNiaW5kKHNjZTEweF9zdGVtLAogICAgICAgIHNjZTEweF9mYXAsCiAgICAgICAgc2NlMTB4X21hY3JvcGhhZ2UpCmBgYAoKCmBgYHtyfQpzYXZlUkRTKAogIHNjZTEweCwKICBmaWxlLnBhdGgoCiAgICBkYXRhX2RpciwKICAgICJwcmVwcm9jZXNzZWQiLAogICAgInNjZTEweF9maWx0ZXJlZF9maW5hbC5yZHMiCiAgKQopCmBgYAoKCmBgYHtyfQpzZXNzaW9uSW5mbygpCmBgYAoKCg==