knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
options(
readr.show_progress = FALSE,
digits = 2,
scipen = 8,
future.globals.maxSize = +Inf
)
library(tidyverse)
library(tximport)
library(DESeq2)
library(vsn)
library(cowplot)
library(glmpca)
library(apeglm)
library(EnhancedVolcano)
library(pheatmap)
theme_set(theme_bw())
get_deseq_data <- function(target_tissue, meta_dt, data_dir, tx2knownGene) {
meta_dt <-
meta_dt %>%
dplyr::filter(tissue == target_tissue)
samples_dir <- file.path(data_dir,
meta_dt %>%
pull(name))
cdna_dir <- file.path(samples_dir)
cdna_files <- file.path(cdna_dir, "abundance.h5")
names(cdna_files) <-
meta_dt %>%
pull(sample)
txi_scaledTPM <- tximport(cdna_files,
type = "kallisto",
tx2gene=tx2knownGene,
countsFromAbundance = c("scaledTPM"))
txi <- tximport(cdna_files,
type = "kallisto",
tx2gene=tx2knownGene,
countsFromAbundance = c("no"))
scaled_tpm <-
txi_scaledTPM$counts %>%
as_tibble(rownames = "gene_id")
gene_counts <-
txi$counts %>%
as_tibble(rownames = "gene_id")
write_tsv(scaled_tpm, paste0(target_tissue, "_engraftment_scaled_tpm.tsv"))
write_tsv(gene_counts, paste0(target_tissue, "_engraftment_counts.tsv"))
dds <- DESeqDataSetFromTximport(txi,
meta_dt %>%
column_to_rownames("sample"),
design = ~ time + age + time:age)
keep <- rowSums(counts(dds) >= 30) >= 2
dds <- dds[keep,]
return(dds)
}
#We use apeglm shrinkage to preserve the size of large LFC and compute s-values (the estimated rate of false sign). Shrinkage is useful for ranking and visualization, without the need for arbitrary filters on low count genes. https://doi.org/10.1093/bioinformatics/bty895
#The local false sign rate FSR is defined as the posterior probability that the posterior mode (MAP) of beta (log2FC) is of a biologically significant effect size (i.e. abs(beta) > lfcThreshold )
get_lfc_estimates <- function(dds, lfc_threshold = 1, svalue_thresh = 0.05){
niche_lfc <-
lfcShrink(dds,
coef = "timeT21.ageold",
type = "apeglm",
svalue = T,
lfcThreshold = lfc_threshold
)
engrafment_lfc <-
lfcShrink(dds,
coef = "time_T21_vs_T0",
type = "apeglm",
svalue = T,
lfcThreshold = lfc_threshold
)
age_lfc <-
lfcShrink(dds,
coef = "age_old_vs_young",
type = "apeglm",
svalue = T,
lfcThreshold = lfc_threshold
)
plotMA(niche_lfc, ylim = c(-22, 8))
abline(h = c(-1, 1), col = "dodgerblue", lwd = 2)
plotMA(engrafment_lfc, ylim = c(-22, 8))
abline(h = c(-1, 1), col = "dodgerblue", lwd = 2)
plotMA(age_lfc, ylim = c(-22, 8))
abline(h = c(-1, 1), col = "dodgerblue", lwd = 2)
summary(niche_lfc, alpha = svalue_thresh)
summary(engrafment_lfc, alpha = svalue_thresh)
summary(age_lfc, alpha = svalue_thresh)
lfc <-
bind_cols(
mcols(dds)[, c("Intercept",
"time_T21_vs_T0",
"age_old_vs_young",
"timeT21.ageold")] %>%
as_tibble() %>%
set_names(c("intercept", "engraf", "age", "niche")),
engrafment_lfc %>%
as_tibble(rownames = "gene_id") %>%
set_names(c(
c("gene_id", "base_mean"),
paste0(
c("lfc", "lfc_se", "svalue"),
"_engraf"
))),
age_lfc %>%
as_tibble(rownames = "gene_id") %>%
dplyr::select(-baseMean, -gene_id) %>%
set_names(paste0(
c("lfc", "lfc_se", "svalue"),
"_age"
)) ,
niche_lfc %>%
as_tibble(rownames = "gene_id") %>%
dplyr::select(-baseMean, -gene_id) %>%
set_names(paste0(
c("lfc", "lfc_se", "svalue"),
"_niche"))) %>%
mutate(
keep_niche = (abs(lfc_niche) > lfc_threshold &
svalue_niche < svalue_thresh),
keep_age = (abs(lfc_age) > lfc_threshold &
svalue_age < svalue_thresh),
keep_engraf = (abs(lfc_engraf) > lfc_threshold &
svalue_engraf < svalue_thresh)
) %>%
mutate(
EAN = paste0(
as.integer(keep_engraf),
as.integer(keep_age),
as.integer(keep_niche)
))
lfc <-
lfc %>%
dplyr::select(gene_id,EAN, base_mean,
intercept, engraf, age, niche,
lfc_engraf, svalue_engraf, lfc_age, svalue_age,
lfc_niche, svalue_niche, everything()
)
}
plot_volcano <- function(data, xvar, yvar, title, svalue_thresh) {
gv <-
EnhancedVolcano(data,
lab = data %>% pull(gene_name),
xlab = bquote(~ italic(Moderated) ~ Log[2] ~ FC),
ylab = bquote(~ -Log[10] ~ italic(svalue)),
x = xvar,
y = yvar,
col = c("grey30", "forestgreen", "red2", "royalblue"),
pCutoff = svalue_thresh,
legend = c("NS", "Log2FC", "svalue", "svalue & Log2FC"),
legendLabels = c(
"NS",
expression(Log[2] ~ FC),
"svalue",
expression(s - value ~ and ~ log[2] ~ FC)
)
)
ggsave(file.path(figures_dir, paste0(title, "_volcano.pdf")), gv)
return(gv)
}
plot_heatmap <- function(target_genes, title, meta_col, data, show_genes = F) {
df <-
left_join(target_genes %>%
dplyr::select(gene_id, gene_name),
data,
by = "gene_id"
) %>%
dplyr::select(-gene_id) %>%
column_to_rownames("gene_name") %>%
as.matrix()
gh <-
pheatmap(df,
color = colorRampPalette(rev(RColorBrewer::brewer.pal(n = 11, name = "Spectral")))(100),
cluster_rows = T,
show_rownames = show_genes,
treeheight_row = 0,
fontsize_col = 4,
fontsize_row = 6,
angle_col = "45",
cluster_cols = FALSE,
annotation_col = meta_col
)
ggsave(file.path(figures_dir, paste0(title, "_heatmap_expression.pdf")), gh)
return(gh)
}
plot_pca <- function(vsd, title) {
pcaData <-
plotPCA(vsd,
intgroup = c( "time", "age", "tissue", "batch", "donor"),
returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
gg_plot <-
ggplot(pcaData %>%
mutate(condition= paste(age, time, sep="_")),
aes(x = PC1, y = PC2, color = time, shape=age, label = name )) +
geom_point( size=4) +
geom_line(aes(group = donor), colour="grey") +
# coord_fixed() +
geom_text(check_overlap=T, angle = 0, size=3, vjust = 3, nudge_y = 1.5) +
xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance")) +
coord_fixed() +
ggtitle("PCA with VST data")
ggsave(file.path(figures_dir, paste0(title, "_pca_bulk_rnaseq.pdf")), gg_plot)
gg_plot
}
plot_gene <- function(gene, data) {
df_plot <-
data %>%
dplyr::filter(gene_id == gene)
gene_name <- df_plot$gene_name[1]
p <- ggplot(
df_plot,
aes(x = time, y = log2_scaled_tpm)) +
geom_point(aes(color = donor)) +
geom_line(aes(group = donor)) +
labs(title = gene_name)+
facet_grid(tissue ~ age)
ggsave(file.path(figures_dir, paste0(gene_name, "_expression_log2_scaled_tpm.pdf")), p)
return(p)
}
figures_dir <- "./figures"
data_dir <- "./data/kallisto_bulk_spliced"
gene_metadata_filepath <- "~/Documents/genome_metadata/data/mm10_ens97_gene_meta.txt"
meta_dt <-
read_tsv(file.path(
data_dir,
"meta.tsv"
)) %>%
# arrange(tissue, age, time) %>%
mutate(
condition = paste(tissue, age, time, sep = "_"),
age = factor(age, levels = c("young", "old")),
tissue = factor(tissue, levels = c("leg", "trunk")),
time = factor(time, levels = c("T0", "T21")),
batch = factor(batch, levels = c(1, 2))
)
Parsed with column specification:
cols(
name = [31mcol_character()[39m,
sample = [31mcol_character()[39m,
time = [31mcol_character()[39m,
tissue = [31mcol_character()[39m,
age = [31mcol_character()[39m,
donor = [31mcol_character()[39m,
type = [31mcol_character()[39m,
batch = [32mcol_double()[39m
)
library(AnnotationHub)
Loading required package: BiocFileCache
Loading required package: dbplyr
Attaching package: ‘dbplyr’
The following objects are masked from ‘package:dplyr’:
ident, sql
Attaching package: ‘AnnotationHub’
The following object is masked from ‘package:Biobase’:
cache
ah <- AnnotationHub()
snapshotDate(): 2019-10-29
edb <- ah[["AH73905"]]
loading from cache
require(“ensembldb”)
seqlevelsStyle(edb) <- "UCSC"
#standard_chroms <- standardChromosomes(edb, species = organism(edb))
tx_meta <-
transcripts(edb,
columns=c("tx_id_version",
"gene_id",
"gene_name",
"gene_id_version"),
return.type="DataFrame")
tx2knownGene <-
tx_meta %>%
as_tibble() %>%
dplyr::select(tx_id_version, gene_id)
gene_metadata <-
read_csv(gene_metadata_filepath) %>%
dplyr::select(-gene_id_version)
Parsed with column specification:
cols(
gene_name = [31mcol_character()[39m,
gene_len = [32mcol_double()[39m,
perc_gene_gc = [32mcol_double()[39m,
gene_id = [31mcol_character()[39m,
seqnames = [31mcol_character()[39m,
gene_biotype = [31mcol_character()[39m,
gene_id_version = [31mcol_character()[39m,
description = [31mcol_character()[39m
)
gene_metadata <-left_join(tx_meta %>% as_tibble() %>%
group_by(gene_id) %>%
dplyr::count( gene_name) %>%
dplyr::select(-n),
gene_metadata %>%
dplyr::select(-gene_name),
by="gene_id") %>%
mutate(gene_name=
scater::uniquifyFeatureNames(
ID= gene_id,
names=gene_name) )
dds_leg <- get_deseq_data("leg", meta_dt, data_dir, tx2knownGene)
1 2 3 4 5 6 7 8 9
summarizing abundance
summarizing counts
summarizing length
1 2 3 4 5 6 7 8 9
summarizing abundance
summarizing counts
summarizing length
using counts and average transcript lengths from tximport
dds_leg
class: DESeqDataSet
dim: 16228 9
metadata(1): version
assays(2): counts avgTxLength
rownames(16228): ENSMUSG00000000001 ENSMUSG00000000028 ... ENSMUSG00000118458 ENSMUSG00000118461
rowData names(0):
colnames(9): yng_leg_t0_1 yng_leg_t0_aug ... old_leg_t21_396_1 old_leg_t21_396_2
colData names(8): name time ... batch condition
dds_trunk <- get_deseq_data("trunk", meta_dt, data_dir, tx2knownGene)
1 2 3 4 5 6 7 8 9 10 11 12
summarizing abundance
summarizing counts
summarizing length
1 2 3 4 5 6 7 8 9 10 11 12
summarizing abundance
summarizing counts
summarizing length
using counts and average transcript lengths from tximport
dds_trunk
class: DESeqDataSet
dim: 15950 12
metadata(1): version
assays(2): counts avgTxLength
rownames(15950): ENSMUSG00000000001 ENSMUSG00000000028 ... ENSMUSG00000118423 ENSMUSG00000118461
rowData names(0):
colnames(12): yng_trunk_t0_2 yng_trunk_t0_3 ... old_trunk_t21_94A_br1 old_trunk_t21_94A_br2
colData names(8): name time ... batch condition
summary_dt_leg <-
colSums(assay(dds_leg)) %>%
tibble::enframe(name = "sample", value = "counts")
summary_dt_leg
summary_dt_trunk <-
colSums(assay(dds_trunk)) %>%
tibble::enframe(name = "sample", value = "counts")
summary_dt_trunk
scaled_tpm_leg <- read_tsv("leg_engraftment_scaled_tpm.tsv")
Parsed with column specification:
cols(
gene_id = [31mcol_character()[39m,
yng_leg_t0_1 = [32mcol_double()[39m,
yng_leg_t0_aug = [32mcol_double()[39m,
yng_leg_t0_3 = [32mcol_double()[39m,
yng_leg_t21_1 = [32mcol_double()[39m,
yng_leg_t21_aug = [32mcol_double()[39m,
old_leg_t0_396_1 = [32mcol_double()[39m,
old_leg_t0_396_2 = [32mcol_double()[39m,
old_leg_t21_396_1 = [32mcol_double()[39m,
old_leg_t21_396_2 = [32mcol_double()[39m
)
scaled_tpm_trunk <- read_tsv("trunk_engraftment_scaled_tpm.tsv")
Parsed with column specification:
cols(
gene_id = [31mcol_character()[39m,
yng_trunk_t0_2 = [32mcol_double()[39m,
yng_trunk_t0_3 = [32mcol_double()[39m,
yng_trunk_t21_2 = [32mcol_double()[39m,
yng_trunk_t21_3 = [32mcol_double()[39m,
old_trunk_t0_88A = [32mcol_double()[39m,
old_trunk_t0_89A = [32mcol_double()[39m,
old_trunk_t0_94A = [32mcol_double()[39m,
old_trunk_t21_88A = [32mcol_double()[39m,
old_trunk_t21_89A_tr1 = [32mcol_double()[39m,
old_trunk_t21_89A_tr2 = [32mcol_double()[39m,
old_trunk_t21_94A_br1 = [32mcol_double()[39m,
old_trunk_t21_94A_br2 = [32mcol_double()[39m
)
scaled_tpm <-
full_join(scaled_tpm_leg,
scaled_tpm_trunk,
"gene_id")
set.seed(420)
gpca <- glmpca(scaled_tpm[, 2:22] %>%
dplyr::filter(rowSums(.[1:21]) > 0), L = 2)
gpca.dat <- gpca$factors
gpca.dat <- cbind(gpca.dat, meta_dt[, c("age", "tissue", "time", "batch", "donor")])
gpca.dat
gg_pca <-
ggplot(
gpca.dat %>% as_tibble(rownames = "sample") %>%
mutate(condition = paste(age, time, sep = "_")),
aes(x = dim1, y = dim2, color = condition, shape = batch, label = sample)
) +
geom_point(size = 4) +
geom_line(aes(group = donor), colour = "grey") +
# coord_fixed() +
geom_text(check_overlap = T, angle = 0, size = 3, vjust = 2, nudge_y = 1.5) +
# facet_grid(~tissue) +
ggtitle("glmpca - Generalized PCA")
gg_pca
ggsave(file.path(figures_dir, "glmpca_bulk_rnaseq.pdf"), gg_pca)
Saving 12 x 7.41 in image
dds_leg <- DESeq(dds_leg, test = "LRT" , reduced = ~1)
estimating size factors
using 'avgTxLength' from assays(dds), correcting for library size
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
plotDispEsts(dds_leg)
dds_trunk <- DESeq(dds_trunk, test = "LRT" , reduced = ~1)
estimating size factors
using 'avgTxLength' from assays(dds), correcting for library size
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
plotDispEsts(dds_trunk)
lfc_leg <- get_lfc_estimates(dds_leg)
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
computing FSOS 'false sign or small' s-values (T=1)
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
computing FSOS 'false sign or small' s-values (T=1)
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
computing FSOS 'false sign or small' s-values (T=1)
thresholding s-values on alpha=0.005 to color points
thresholding s-values on alpha=0.005 to color points
out of 16228 with nonzero total read count
s-value < 0.05
LFC > 1.00 (up) : 345, 2.1%
LFC < -1.00 (down) : 1366, 8.4%
out of 16228 with nonzero total read count
s-value < 0.05
LFC > 1.00 (up) : 622, 3.8%
LFC < -1.00 (down) : 129, 0.79%
out of 16228 with nonzero total read count
s-value < 0.05
LFC > 1.00 (up) : 583, 3.6%
LFC < -1.00 (down) : 316, 1.9%
lfc_trunk<- get_lfc_estimates(dds_trunk)
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
computing FSOS 'false sign or small' s-values (T=1)
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
computing FSOS 'false sign or small' s-values (T=1)
using 'apeglm' for LFC shrinkage. If used in published research, please cite:
Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
sequence count data: removing the noise and preserving large differences.
Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
computing FSOS 'false sign or small' s-values (T=1)
thresholding s-values on alpha=0.005 to color points
thresholding s-values on alpha=0.005 to color points
out of 15950 with nonzero total read count
s-value < 0.05
LFC > 1.00 (up) : 500, 3.1%
LFC < -1.00 (down) : 183, 1.1%
out of 15950 with nonzero total read count
s-value < 0.05
LFC > 1.00 (up) : 298, 1.9%
LFC < -1.00 (down) : 40, 0.25%
out of 15950 with nonzero total read count
s-value < 0.05
LFC > 1.00 (up) : 421, 2.6%
LFC < -1.00 (down) : 536, 3.4%
res_lfc <-
full_join(lfc_leg,
lfc_trunk,
by = "gene_id",
suffix = c("_leg", "_trunk")
)
res_lfc <-
res_lfc %>%
left_join(., gene_metadata, by = "gene_id") %>%
dplyr::select(gene_name, everything()) %>%
mutate(gene_name=
scater::uniquifyFeatureNames(
ID= gene_id,
names=gene_name))
res_lfc
table(res_lfc$EAN_leg)
000 001 010 011 100 101 110 111
13868 1048 351 210 258 155 40 298
table(res_lfc$EAN_trunk)
000 001 010 011 100 101 110 111
14336 399 689 188 206 52 36 44
table(res_lfc$EAN_trunk, res_lfc$EAN_leg)
000 001 010 011 100 101 110 111
000 11800 859 198 149 158 133 31 241
001 326 25 10 5 23 0 0 1
010 542 21 98 11 2 1 2 4
011 144 8 10 13 7 1 1 2
100 109 9 5 2 37 6 3 30
101 25 5 0 2 17 1 2 0
110 27 0 7 1 1 0 0 0
111 28 1 5 0 3 2 0 4
write_tsv(res_lfc, "leg_trunk_moderated_lfc_1_svalue_05_marked.txt")
svalue_thresh <- 0.05
plot_volcano(res_lfc, "lfc_niche_leg", "svalue_niche_leg", "niche_leg", svalue_thresh)
Saving 7 x 7 in image
plot_volcano(res_lfc, "lfc_age_leg", "svalue_age_leg", "age_leg", svalue_thresh)
Saving 10 x 8 in image
plot_volcano(res_lfc, "lfc_engraf_leg", "svalue_engraf_leg", "engraftment_leg", svalue_thresh)
Saving 10 x 8 in image
plot_volcano(res_lfc, "lfc_niche_trunk", "svalue_niche_trunk", "niche_trunk", svalue_thresh)
Saving 7 x 7 in image
plot_volcano(res_lfc, "lfc_age_trunk", "svalue_age_trunk", "age_trunk", svalue_thresh)
One or more p-values is 0. Converting to 10^-1 * current lowest non-zero p-value...Saving 10 x 8 in image
plot_volcano(res_lfc, "lfc_engraf_trunk", "svalue_engraf_trunk", "engraftment_trunk", svalue_thresh)
Saving 10 x 8 in image
res_lfc_hits <-
res_lfc %>%
dplyr::filter(EAN_leg %in% c("001", "011", "101", "111", "100", "010", "110") |
EAN_trunk %in% c("001", "011", "101", "111", "100", "010", "110")) %>%
arrange(EAN_leg, EAN_trunk)
res_lfc_hits
write_tsv(res_lfc_hits, "leg_trunk_moderated_lfc_1_svalue_05_marked_hits.txt")
vsd_leg <-
vst(dds_leg,
blind = T
)
Is there a dependence of the standard deviation (or variance) on the mean ? If there is no variance-mean dependence, then the line should be approximately horizontal
meanSdPlot(assay(vsd_leg), ranks = F)
vsd_trunk <-
vst(dds_trunk,
blind = T
)
meanSdPlot(assay(vsd_trunk), ranks = F)
plot_pca(vsd_leg, "leg")
Saving 7 x 7 in image
plot_pca(vsd_trunk, "trunk")
Saving 7 x 7 in image
target_genes_leg_001 <-
res_lfc %>%
dplyr::filter(EAN_leg %in% c("001")) %>%
arrange(svalue_niche_leg)
target_genes_leg_001
target_genes_leg_011 <-
res_lfc%>%
dplyr::filter(EAN_leg %in% c("011")) %>%
arrange(-base_mean_leg)
target_genes_leg_011
target_genes_leg_101 <-
res_lfc %>%
dplyr::filter(EAN_leg %in% c("101"))
target_genes_leg_101
target_genes_leg_111 <-
res_lfc %>%
dplyr::filter(EAN_leg %in% c("111"))
target_genes_leg_111
target_genes_leg_100 <-
res_lfc %>%
dplyr::filter(EAN_leg %in% c("100"))
target_genes_leg_100
target_genes_leg_010 <-
res_lfc %>%
dplyr::filter(EAN_leg %in% c("010"))
target_genes_leg_010
target_genes_leg_110 <-
res_lfc %>%
dplyr::filter(EAN_leg %in% c("110"))
target_genes_leg_110
target_genes_trunk_001 <-
res_lfc %>%
dplyr::filter(EAN_trunk%in% c("001"))
target_genes_trunk_001
target_genes_trunk_011 <-
res_lfc %>%
dplyr::filter(EAN_trunk%in% c("011"))
target_genes_trunk_011
target_genes_trunk_101 <-
res_lfc %>%
dplyr::filter(EAN_trunk%in% c("101"))
target_genes_trunk_101
target_genes_trunk_111 <-
res_lfc %>%
dplyr::filter(EAN_trunk%in% c("111"))
target_genes_trunk_111
target_genes_trunk_100 <-
res_lfc %>%
dplyr::filter(EAN_trunk%in% c("100"))
target_genes_trunk_100
target_genes_trunk_010 <-
res_lfc %>%
dplyr::filter(EAN_trunk%in% c("010"))
target_genes_trunk_010
target_genes_trunk_110 <-
res_lfc %>%
dplyr::filter(EAN_trunk%in% c("110"))
target_genes_trunk_110
meta_df_leg <-
as.data.frame(colData(dds_leg)[, c("age", "time")])
dt_plot_leg <-
scaled_tpm_leg %>%
mutate_if(is.numeric,~ log2( .+1))
plot_heatmap(target_genes_leg_001, title = "log2_scaled_tpm_leg_001", meta_df_leg, dt_plot_leg)
plot_heatmap(target_genes_leg_011, title = "log2_scaled_tpm_leg_011", meta_df_leg, dt_plot_leg, show_genes = T)
plot_heatmap(target_genes_leg_101, title = "log2_scaled_tpm_leg_101", meta_df_leg, dt_plot_leg, show_genes = T)
plot_heatmap(target_genes_leg_111, title = "log2_scaled_tpm_leg_111", meta_df_leg, dt_plot_leg, show_genes = T)
plot_heatmap(target_genes_leg_100, title = "log2_scaled_tpm_leg_100", meta_df_leg, dt_plot_leg)
plot_heatmap(target_genes_leg_010, title = "log2_scaled_tpm_leg_010", meta_df_leg, dt_plot_leg)
plot_heatmap(target_genes_leg_110, title = "log2_scaled_tpm_leg_110", meta_df_leg, dt_plot_leg, show_genes = T)
meta_df_trunk <-
as.data.frame(colData(dds_trunk)[, c("age", "time")])
dt_plot_trunk <-
scaled_tpm_trunk %>%
mutate_if(is.numeric,~ log2( .+1))
plot_heatmap(target_genes_trunk_001, title = "log2_scaled_tpm_trunk_001", meta_df_trunk, dt_plot_trunk, show_genes = T)
plot_heatmap(target_genes_trunk_011, title = "log2_scaled_tpm_trunk_011", meta_df_trunk, dt_plot_trunk, show_genes = T)
plot_heatmap(target_genes_trunk_101, title = "log2_scaled_tpm_trunk_101", meta_df_trunk, dt_plot_trunk, show_genes = T)
plot_heatmap(target_genes_trunk_111, title = "log2_scaled_tpm_trunk_111", meta_df_trunk, dt_plot_trunk, show_genes = T)
plot_heatmap(target_genes_trunk_100, title = "log2_scaled_tpm_trunk_100", meta_df_trunk, dt_plot_trunk, show_genes = T)
plot_heatmap(target_genes_trunk_010, title = "log2_scaled_tpm_trunk_010", meta_df_trunk, dt_plot_trunk)
plot_heatmap(target_genes_trunk_110, title = "log2_scaled_tpm_trunk_110", meta_df_trunk, dt_plot_trunk, show_genes = T)
vsd_dt <-
full_join(
assay(vsd_leg) %>%
as_tibble(rownames = "gene_id"),
assay(vsd_trunk) %>%
as_tibble(rownames = "gene_id"),
by="gene_id"
)
vsd_dt_melted <-
vsd_dt %>%
# filter(gene_id %in% rowData(sce10x)$gene_id) %>%
gather(sample, vsd, -gene_id)
vsd_dt_melted <-
left_join(vsd_dt_melted,
meta_dt %>%
dplyr::select(-name, -type, -batch),
by = "sample"
)
vsd_dt_melted
scaled_tpm_melted <-
scaled_tpm %>%
gather(sample, tpm, -gene_id)
scaled_tpm_melted <-
left_join(scaled_tpm_melted,
meta_dt %>%
dplyr::select(-name, -type, -batch),
by = "sample"
)
scaled_tpm_melted <-
left_join(scaled_tpm_melted,
gene_metadata %>%
dplyr::select(gene_name, gene_id),
by = "gene_id"
) %>%
mutate(log2_scaled_tpm = log2(tpm+1))
plot_dt <-
scaled_tpm %>%
dplyr::select(-gene_id) %>%
mutate_if(is.numeric, ~ log2(.+1))
df <-
meta_dt[, c("sample", "tissue", "age", "time")] %>%
column_to_rownames("sample")
select <- order(rowMeans(plot_dt %>% as.matrix()),
decreasing = TRUE
)[1:5000]
g_h2 <-
pheatmap(plot_dt[select, ],
cluster_rows = T,
show_rownames = FALSE,
cluster_cols = FALSE,
annotation_col = df
)
g_h2
ggsave(file.path(figures_dir, "heatmap_bulk_rnaseq_log2_scaled_tpm_5000genes.pdf"), g_h2)
Saving 10 x 18 in image
plot_gene("ENSMUSG00000026043", scaled_tpm_melted)
Saving 7 x 7 in image
p1<- ggplot(res_lfc,
aes(x = lfc_age_leg, y = lfc_niche_leg)
) +
geom_point(size = .7, alpha=0.3)
ggsave(file.path(figures_dir, "niche_vs_age_lfc_leg.pdf"), p1)
Saving 7 x 7 in image
p1
p2<- ggplot(res_lfc,
aes(x = lfc_age_leg, y = lfc_engraf_leg)
) +
geom_point(size = .7, alpha=0.3)
ggsave(file.path(figures_dir, "engraftment_vs_age_lfc_leg.pdf"), p2)
Saving 7 x 7 in image
p2
p3<- ggplot(res_lfc,
aes(x = lfc_niche_leg, y = lfc_engraf_leg)
) +
geom_point(size = .7, alpha=0.3)
ggsave(file.path(figures_dir, "engraftment_vs_niche_lfc_leg.pdf"), p3)
Saving 7 x 7 in image
p3
p4<- ggplot(res_lfc,
aes(x = lfc_age_trunk, y = lfc_niche_trunk)
) +
geom_point(size = .7, alpha=0.3)
ggsave(file.path(figures_dir, "niche_vs_age_lfc_trunk.pdf"), p4)
Saving 7 x 7 in image
p4
p5<- ggplot(res_lfc,
aes(x = lfc_age_trunk, y = lfc_engraf_trunk)
) +
geom_point(size = .7, alpha=0.3)
ggsave(file.path(figures_dir, "engraftment_vs_age_lfc_trunk.pdf"), p5)
Saving 7 x 7 in image
p5
p6<- ggplot(res_lfc,
aes(x = lfc_niche_trunk, y = lfc_engraf_trunk)
) +
geom_point(size = .7, alpha=0.3)
ggsave(file.path(figures_dir, "engraftment_vs_niche_lfc_trunk.pdf"), p6)
Saving 7 x 7 in image
p6
p3<- ggplot(res_lfc,
aes(x = age_leg, y = engraf_leg)
) +
geom_point(size = .5, alpha=0.1)
p3
sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
[6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] ensembldb_2.10.2 AnnotationFilter_1.10.0 GenomicFeatures_1.38.2 AnnotationDbi_1.48.0
[5] AnnotationHub_2.18.0 BiocFileCache_1.10.2 dbplyr_1.4.2 pheatmap_1.0.12
[9] EnhancedVolcano_1.4.0 ggrepel_0.8.2 apeglm_1.8.0 glmpca_0.1.0
[13] cowplot_1.0.0 vsn_3.54.0 DESeq2_1.26.0 SummarizedExperiment_1.16.1
[17] DelayedArray_0.12.2 BiocParallel_1.20.1 matrixStats_0.55.0 Biobase_2.46.0
[21] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0 IRanges_2.20.2 S4Vectors_0.24.3
[25] BiocGenerics_0.32.0 tximport_1.14.0 forcats_0.5.0 stringr_1.4.0
[29] dplyr_0.8.5 purrr_0.3.3 readr_1.3.1 tidyr_1.0.2
[33] tibble_2.1.3 ggplot2_3.3.0 tidyverse_1.3.0
loaded via a namespace (and not attached):
[1] tidyselect_1.0.0 RSQLite_2.2.0 htmlwidgets_1.5.1 grid_3.6.3
[5] munsell_0.5.0 preprocessCore_1.48.0 withr_2.1.2 colorspace_1.4-1
[9] knitr_1.28 rstudioapi_0.11 SingleCellExperiment_1.8.0 labeling_0.3
[13] bbmle_1.0.23.1 GenomeInfoDbData_1.2.2 bit64_0.9-7 farver_2.0.3
[17] rhdf5_2.30.1 rprojroot_1.3-2 coda_0.19-3 vctrs_0.2.4
[21] generics_0.0.2 xfun_0.12 R6_2.4.1 ggbeeswarm_0.6.0
[25] rsvd_1.0.3 locfit_1.5-9.1 bitops_1.0-6 assertthat_0.2.1
[29] promises_1.1.0 scales_1.1.0 nnet_7.3-13 beeswarm_0.2.3
[33] gtable_0.3.0 affy_1.64.0 rlang_0.4.5 genefilter_1.68.0
[37] splines_3.6.3 rtracklayer_1.46.0 lazyeval_0.2.2 acepack_1.4.1
[41] hexbin_1.28.1 broom_0.5.5 checkmate_2.0.0 BiocManager_1.30.10
[45] yaml_2.2.1 modelr_0.1.6 backports_1.1.5 httpuv_1.5.2
[49] Hmisc_4.3-1 tools_3.6.3 affyio_1.56.0 ellipsis_0.3.0
[53] RColorBrewer_1.1-2 Rcpp_1.0.3 plyr_1.8.6 base64enc_0.1-3
[57] progress_1.2.2 zlibbioc_1.32.0 RCurl_1.98-1.1 prettyunits_1.1.1
[61] rpart_4.1-15 openssl_1.4.1 viridis_0.5.1 haven_2.2.0
[65] cluster_2.1.0 fs_1.3.2 magrittr_1.5 data.table_1.12.8
[69] reprex_0.3.0 mvtnorm_1.1-0 ProtGenerics_1.18.0 hms_0.5.3
[73] mime_0.9 evaluate_0.14 xtable_1.8-4 XML_3.99-0.3
[77] emdbook_1.3.12 jpeg_0.1-8.1 readxl_1.3.1 gridExtra_2.3
[81] compiler_3.6.3 biomaRt_2.42.0 bdsmatrix_1.3-4 scater_1.14.6
[85] crayon_1.3.4 htmltools_0.4.0 later_1.0.0 Formula_1.2-3
[89] geneplotter_1.64.0 lubridate_1.7.4 DBI_1.1.0 MASS_7.3-51.5
[93] rappdirs_0.3.1 Matrix_1.2-18 cli_2.0.2 pkgconfig_2.0.3
[97] GenomicAlignments_1.22.1 numDeriv_2016.8-1.1 foreign_0.8-75 xml2_1.2.4
[101] annotate_1.64.0 vipor_0.4.5 XVector_0.26.0 rvest_0.3.5
[105] digest_0.6.25 Biostrings_2.54.0 rmarkdown_2.1 cellranger_1.1.0
[109] htmlTable_1.13.3 DelayedMatrixStats_1.8.0 curl_4.3 shiny_1.4.0
[113] Rsamtools_2.2.3 lifecycle_0.2.0 nlme_3.1-144 jsonlite_1.6.1
[117] Rhdf5lib_1.8.0 BiocNeighbors_1.4.2 viridisLite_0.3.0 askpass_1.1
[121] limma_3.42.2 fansi_0.4.1 pillar_1.4.3 lattice_0.20-40
[125] fastmap_1.0.1 httr_1.4.1 survival_3.1-8 interactiveDisplayBase_1.24.0
[129] glue_1.3.1 png_0.1-7 BiocVersion_3.10.1 bit_1.1-15.2
[133] stringi_1.4.6 blob_1.2.1 BiocSingular_1.2.2 latticeExtra_0.6-29
[137] memoise_1.1.0 irlba_2.3.3