Cluster cell types
Cluster young cells
sce10x_yng <- sce10x[, colData(sce10x)$condition == "yng"]
n_exprs_genes_yng <-
nexprs(sce10x_yng,
detection_limit = 5,
byrow = TRUE
)
table(n_exprs_genes_yng)[1:20]
n_exprs_genes_yng
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
24696 1436 778 467 333 269 236 200 175 125 150 129 111 111 112 84 71 88
18 19
68 74
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_yng_tumap <-
tumap(t(assay(sce10x_yng[n_exprs_genes_yng >= 3, ], "counts") %>%
as.matrix()),
metric = "cosine",
ret_nn = TRUE
)
set.seed(42)
sce10x_umap_yng <-
umap(t(assay(
sce10x_yng[n_exprs_genes_yng >= 3, ],
"counts"
) %>%
as.matrix()),
metric = "cosine",
nn_method = sce10x_yng_tumap$nn,
a = .6,
b = .55
)
colnames(sce10x_umap_yng) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_yng, "umap") <- sce10x_umap_yng
dt <-
bind_cols(
colData(sce10x_yng) %>%
as_tibble(),
sce10x_umap_yng %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = markers
),
size = 3,
alpha = 1
)
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
set.seed(42)
g <- buildSNNGraph(sce10x_yng, k = 20, use.dimred = "umap")
clusters_out <- igraph::cluster_walktrap(g)
# colLabels(sce10x) <- factor(igraph::cluster_louvain(g)$membership)
table(clusters_out$membership)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
147 72 354 370 147 434 88 90 93 87 272 236 250 176 214 404 146 295 493 110 238 151 172 98 69 96 233
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
162 78 74 81 63 133 144 62 67 102 61 55 83 64 42 41 38 24 26 29 46 23 35 45 21 38 31
55
32
igraph_clusters <- igraph::cut_at(clusters_out, n = 16)
table(igraph_clusters)
igraph_clusters
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
404 109 1535 173 1056 1848 890 122 483 291 88 87 41 45 31 32
cluster_by_marker <-
table(igraph_clusters, colData(sce10x_yng)$markers)
cluster_by_marker
igraph_clusters fap macrophage none stem
1 0 402 0 2
2 0 109 0 0
3 1457 3 64 11
4 6 17 126 24
5 0 3 58 995
6 5 8 146 1689
7 7 3 68 812
8 15 0 99 8
9 0 480 2 1
10 1 0 18 272
11 1 1 25 61
12 0 1 8 78
13 4 5 22 10
14 2 3 39 1
15 2 2 27 0
16 0 1 24 7
labels <- apply(cluster_by_marker, 1, which.max)
level_key_1 <- colnames(cluster_by_marker)[labels]
names(level_key_1) <- names(labels)
level_key_1
1 2 3 4 5 6 7 8
"macrophage" "macrophage" "fap" "none" "stem" "stem" "stem" "none"
9 10 11 12 13 14 15 16
"macrophage" "stem" "stem" "stem" "none" "none" "none" "none"
table(level_key_1)
level_key_1
fap macrophage none stem
1 3 6 6
level_key_2 <- paste(level_key_1, names(level_key_1), sep = "_")
names(level_key_2) <- names(labels)
colData(sce10x_yng)$clusters_level1 <- recode(igraph_clusters, !!!level_key_1)
colData(sce10x_yng)$clusters_level2 <- recode(igraph_clusters, !!!level_key_2)
table(colData(sce10x_yng)$clusters_level1, colData(sce10x_yng)$sample)
yng1 yng2 yng3
fap 502 393 640
macrophage 289 289 418
none 185 158 101
stem 1261 1936 1063
table(colData(sce10x_yng)$clusters_level2, colData(sce10x_yng)$sample)
yng1 yng2 yng3
fap_3 502 393 640
macrophage_1 107 146 151
macrophage_2 9 31 69
macrophage_9 173 112 198
none_13 15 8 18
none_14 14 22 9
none_15 23 3 5
none_16 0 0 32
none_4 96 74 3
none_8 37 51 34
stem_10 16 274 1
stem_11 0 0 88
stem_12 60 17 10
stem_5 111 865 80
stem_6 1066 766 16
stem_7 8 14 868
dt <-
bind_cols(
colData(sce10x_yng) %>%
as_tibble(),
sce10x_umap_yng %>%
as_tibble()
)
p3 <-
ggplot(dt, aes(umap1,
umap2,
colour = subsets_mitotic_detected >1
)) +
geom_jitter(
width = .1,
height = .1,
size = 3,
alpha = 1
)
p3 + facet_wrap(~clusters_level2)
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_louvain_clusters.pdf"), p3 )
colData(sce10x_yng)$clusters_level1[colData(sce10x_yng)$clusters_level2 =="stem_12"] <- "stem_mitotic"
dt <-
bind_cols(
colData(sce10x_yng) %>%
as_tibble(),
sce10x_umap_yng %>%
as_tibble()
)
p3 <-
ggplot(dt, aes(umap1,
umap2,
colour = clusters_level1
)) +
geom_jitter(
width = .1,
height = .1,
size = 3,
alpha = 1
)
p3
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_louvain_clusters.pdf"), p3 )
Cluster aged cells
sce10x_aged <- sce10x[, colData(sce10x)$condition == "aged"]
n_exprs_genes_aged <-
nexprs(sce10x_aged,
detection_limit = 5,
byrow = TRUE
)
table(n_exprs_genes_aged)[1:20]
n_exprs_genes_aged
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
25860 1574 714 507 365 253 220 195 175 129 137 108 122 90 80 71 71 83
18 19
66 71
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_aged_tumap <-
tumap(t(assay(
sce10x_aged[n_exprs_genes_aged >= 3, ],
"counts"
) %>%
as.matrix()),
metric = "cosine",
ret_nn = TRUE
)
set.seed(42)
sce10x_umap_aged <-
umap(t(assay(
sce10x_aged[n_exprs_genes_aged >= 3, ],
"counts"
) %>%
as.matrix()),
metric = "cosine",
nn_method = sce10x_aged_tumap$nn,
a = .6,
b = .6
)
colnames(sce10x_umap_aged) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_aged, "umap") <- sce10x_umap_aged
dt <-
bind_cols(
colData(sce10x_aged) %>%
as_tibble(),
sce10x_umap_aged %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = markers
),
size = 1,
alpha = 1
)
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
set.seed(42)
g <- buildSNNGraph(sce10x_aged, k = 20, use.dimred = "umap")
clusters_out <- igraph::cluster_walktrap(g)
# colLabels(sce10x) <- factor(igraph::cluster_louvain(g)$membership)
table(clusters_out$membership)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
788 319 135 715 84 321 239 407 740 58 353 157 191 474 837 286 122 105 325 506 422 200 236 123 279 116 160
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
69 203 187 92 49 54 97 57 106 93 64 180 59 48 67 33 55 68 34 65 41 28 40 56 45 44 37
55
24
igraph_clusters <- igraph::cut_at(clusters_out, n = 9)
table(igraph_clusters)
igraph_clusters
1 2 3 4 5 6 7 8 9
6639 131 343 674 2335 157 82 295 37
cluster_by_marker <-
table(igraph_clusters, colData(sce10x_aged)$markers)
cluster_by_marker
igraph_clusters fap macrophage none stem
1 5865 37 710 27
2 0 127 3 1
3 6 3 332 2
4 6 664 3 1
5 14 20 694 1607
6 10 6 136 5
7 3 2 74 3
8 63 5 224 3
9 32 0 5 0
labels <- apply(cluster_by_marker, 1, which.max)
level_key_1 <- colnames(cluster_by_marker)[labels]
names(level_key_1) <- names(labels)
level_key_1
1 2 3 4 5 6 7 8
"fap" "macrophage" "none" "macrophage" "stem" "none" "none" "none"
9
"fap"
table(level_key_1)
level_key_1
fap macrophage none stem
2 2 4 1
level_key_2 <- paste(level_key_1, names(level_key_1), sep = "_")
names(level_key_2) <- names(labels)
level_key_2
1 2 3 4 5 6 7
"fap_1" "macrophage_2" "none_3" "macrophage_4" "stem_5" "none_6" "none_7"
8 9
"none_8" "fap_9"
colData(sce10x_aged)$clusters_level1 <- recode(igraph_clusters, !!!level_key_1)
colData(sce10x_aged)$clusters_level2 <- recode(igraph_clusters, !!!level_key_2)
table(colData(sce10x_aged)$clusters_level1, colData(sce10x_aged)$sample)
aged1 aged2 aged3 aged4
fap 2027 2022 1583 1044
macrophage 65 398 149 193
none 168 345 143 221
stem 545 692 337 761
table(colData(sce10x_aged)$clusters_level2, colData(sce10x_aged)$sample)
aged1 aged2 aged3 aged4
fap_1 1993 2021 1582 1043
fap_9 34 1 1 1
macrophage_2 0 128 0 3
macrophage_4 65 270 149 190
none_3 70 99 100 74
none_6 36 85 10 26
none_7 11 27 8 36
none_8 51 134 25 85
stem_5 545 692 337 761
dt <-
bind_cols(
colData(sce10x_aged) %>%
as_tibble(),
sce10x_umap_aged %>%
as_tibble()
)
p3 <-
ggplot(dt, aes(umap1,
umap2,
colour = clusters_level1
)) +
geom_jitter(
width = .1,
height = .1,
size = .7,
alpha = 1
)
p3 #+facet_wrap(~clusters_level2)
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_louvain_clusters.pdf"), p3 )
Save data
meta_dt <-
rbind(colData(sce10x_aged), colData(sce10x_yng))
colData(sce10x)$cluster_level2 <- paste(meta_dt$condition, meta_dt$clusters_level1, sep="_")
colData(sce10x) <- meta_dt
saveRDS(sce10x,
file.path(
data_dir,
"preprocessed",
"sce10x_filtered_cells.rds"))
Normalize and correct for batch effects
Discard non-marked and mitotic cells
sce10x <- sce10x[, colData(sce10x)$clusters_level1 %in% c("macrophage", "fap", "stem") ]
n_exprs_genes <-
nexprs(sce10x,
detection_limit = 0,
byrow = TRUE
)
table(n_exprs_genes)[1:20]
n_exprs_genes
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
981 2716 1434 1055 759 591 471 444 357 357 336 333 285 252 226 239 231 190 209 167
sce10x <- sce10x[n_exprs_genes >0,]
sce10x
class: SingleCellExperiment
dim: 33578 16520
metadata(42): tximetaInfo quantInfo ... txomeInfo txdbInfo
assays(3): counts spliced unspliced
rownames(33578): Gm1992 Gm6123 ... mt-Cytb mt-Nd6
rowData names(9): ensembl_gene_id_version external_gene_name ... nexprs_l1_s keep
colnames(16520): GACTATGTCCGGCTTT_d1 ATGGTTGGTTGTAAAG_d1 ... GTCCCTCTCCCTCTAG_g3
AAACGCTGTCGTTTAC_g3
colData names(22): sum detected ... clusters_level1 clusters_level2
reducedDimNames(0):
altExpNames(0):
colData(sce10x)$clusters_level2 <-
paste(colData(sce10x)$condition,
colData(sce10x)$clusters_level1, sep="_")
Normalize data across samples
sce10x <-
multiBatchNorm(sce10x,
batch = colData(sce10x)$sample
)
n_exprs_genes <-
nexprs(sce10x,
detection_limit = 5,
byrow = TRUE
)
table(n_exprs_genes)[1:20]
n_exprs_genes
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
23438 1596 722 483 357 280 220 191 166 164 128 134 95 88 121 68 73 80
18 19
67 71
Run fast mutual nearest neighbors correction
sce10x_corrected <-
fastMNN(sce10x,
batch = colData(sce10x)$sample,
k = 20,
d = 200,
subset.row = names(n_exprs_genes)[n_exprs_genes >= 3]
)
reducedDim(sce10x, "mnn_corrected") <- reducedDim(sce10x_corrected, "corrected")
Run UMAP
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_tumap <-
tumap(reducedDim(sce10x,
"mnn_corrected"),
metric = "cosine",
ret_nn = TRUE)
set.seed(42)
sce10x_umap <-
umap(reducedDim(sce10x,
"mnn_corrected"),
metric = "cosine",
nn_method = sce10x_tumap$nn,
a = .8,
b = .55
)
colnames(sce10x_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x, "mnn_umap") <- sce10x_umap
dt <-
bind_cols(
colData(sce10x) %>%
as_tibble(),
sce10x_umap %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = clusters_level1
),
size = .6,
alpha = 1
) #+ facet_wrap(~condition)
p2
ggsave(file.path(figures_dir, "mnn_umap.pdf"), p2 )
Saving 14 x 8 in image
Cluster stem cells
set.seed(42)
g <- buildSNNGraph(sce10x, k = 100, use.dimred = "mnn_umap")
clusters_out <- igraph::cluster_walktrap(g)
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_stem_tumap <-
tumap(reducedDim(sce10x_stem,
"mnn_corrected"),
metric = "cosine",
ret_nn = TRUE)
set.seed(42)
sce10x_stem_umap <-
umap(reducedDim(sce10x_stem,
"mnn_corrected"),
metric = "cosine",
nn_method = sce10x_stem_tumap$nn,
a = .85,
b = .43
)
colnames(sce10x_stem_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_stem, "mnn_umap") <- sce10x_stem_umap
dt <-
bind_cols(
colData(sce10x_stem) %>%
as_tibble(),
sce10x_stem_umap %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = clusters_level2
),
size = 1,
alpha =.6
)
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
set.seed(42)
g <- buildSNNGraph(sce10x_stem, k = 100, use.dimred = "mnn_umap")
clusters_out <- igraph::cluster_walktrap(g)
igraph_clusters <- factor(clusters_out$membership)
table(igraph_clusters)
igraph_clusters
1 2 3 4 5 6 7 8 9 10 11 12 13 14
433 711 773 430 912 852 464 386 234 433 188 157 290 245
igraph_clusters <- igraph::cut_at(clusters_out, n = 5)
table(igraph_clusters)
igraph_clusters
1 2 3 4 5
1931 621 2043 1061 852
discard_cell <- sce10x_stem_umap[,1] < -10 | sce10x_stem_umap[,1] > 22 | (igraph_clusters ==2 & colData(sce10x_stem)$condition=="aged")
table(discard_cell)
discard_cell
FALSE TRUE
6429 79
igraph_clusters[discard_cell] <- 6
cluster_by_marker <-
table(igraph_clusters, colData(sce10x_stem)$sample)
cluster_by_marker
igraph_clusters aged1 aged2 aged3 aged4 yng1 yng2 yng3
1 20 86 52 85 495 902 291
2 0 0 0 0 250 220 126
3 98 178 79 196 356 606 530
4 135 214 102 255 68 141 92
5 268 204 93 203 30 44 10
6 24 10 11 22 2 6 4
colData(sce10x_stem)$clusters <- factor(igraph_clusters)
sce10x_stem_subset <-
sce10x_stem[, colData(sce10x_stem)$clusters!=6]
dt <-
bind_cols(
colData(sce10x_stem_subset) %>%
as_tibble(),
reducedDim(sce10x_stem_subset, "mnn_umap") %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = clusters
),
size = 1.7,
alpha = 1
)
p2 # + facet_wrap(~clusters)
ggsave(file.path(figures_dir, "mnn_umap_stem.pdf"), p2 )
Saving 14 x 10 in image
Cluster fap cells
sce10x_fap <- sce10x[,colData(sce10x)$clusters_level1 =="fap"]
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_fap_tumap <-
tumap(reducedDim(sce10x_fap,
"mnn_corrected"),
metric = "cosine",
ret_nn = TRUE)
set.seed(42)
sce10x_fap_umap <-
umap(reducedDim(sce10x_fap,
"mnn_corrected"),
metric = "cosine",
nn_method = sce10x_fap_tumap$nn,
a = .9,
b = .78
)
colnames(sce10x_fap_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_fap, "mnn_umap") <- sce10x_fap_umap
dt <-
bind_cols(
colData(sce10x_fap) %>%
as_tibble(),
sce10x_fap_umap %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = clusters_level2
),
size = 1,
alpha =.6
)
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
set.seed(42)
g <- buildSNNGraph(sce10x_fap, k = 100, use.dimred = "mnn_umap")
clusters_out <- igraph::cluster_walktrap(g)
igraph_clusters <- factor(clusters_out$membership)
table(igraph_clusters)
igraph_clusters
1 2 3 4 5 6 7 8 9 10
1217 889 1246 664 1710 1383 413 304 239 146
igraph_clusters <- igraph::cut_at(clusters_out, n = 3)
table(igraph_clusters)
igraph_clusters
1 2 3
6102 1963 146
discard_cell <- sce10x_fap_umap[,1] > 5
table(discard_cell)
discard_cell
FALSE TRUE
8149 62
igraph_clusters[discard_cell] <- 4
cluster_by_marker <-
table(igraph_clusters, colData(sce10x_fap)$sample)
cluster_by_marker
igraph_clusters aged1 aged2 aged3 aged4 yng1 yng2 yng3
1 1608 1562 1120 824 289 273 364
2 369 419 434 189 196 107 249
3 43 28 14 14 13 10 24
4 7 13 15 17 4 3 3
colData(sce10x_fap)$clusters <- factor(igraph_clusters)
sce10x_fap_subset <-
sce10x_fap[, colData(sce10x_fap)$clusters!=4]
dt <-
bind_cols(
colData(sce10x_fap_subset) %>%
as_tibble(),
reducedDim(sce10x_fap_subset, "mnn_umap") %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = clusters
),
size = 1.7,
alpha = 1
)
p2 # + facet_wrap(~clusters)
ggsave(file.path(figures_dir, "mnn_umap_fap.pdf"), p2 )
Saving 14 x 10 in image
Cluster macrophage cells
sce10x_macrophage <- sce10x[,colData(sce10x)$clusters_level1 =="macrophage"]
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_macrophage_tumap <-
tumap(reducedDim(sce10x_macrophage,
"mnn_corrected"),
metric = "cosine",
ret_nn = TRUE)
set.seed(42)
sce10x_macrophage_umap <-
umap(reducedDim(sce10x_macrophage,
"mnn_corrected"),
metric = "cosine",
nn_method = sce10x_macrophage_tumap$nn,
a = 0.75,
b = 0.65
)
colnames(sce10x_macrophage_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_macrophage, "mnn_umap") <- sce10x_macrophage_umap
dt <-
bind_cols(
colData(sce10x_macrophage) %>%
as_tibble(),
sce10x_macrophage_umap %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = clusters_level2
),
size = 1,
alpha =.6
)
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
set.seed(42)
g <- buildSNNGraph(sce10x_macrophage, k = 50, use.dimred = "mnn_umap")
clusters_out <- igraph::cluster_walktrap(g)
igraph_clusters <- factor(clusters_out$membership)
table(igraph_clusters)
igraph_clusters
1 2 3 4 5 6 7 8 9 10 11 12
224 215 253 193 246 158 108 73 93 78 62 98
igraph_clusters <- igraph::cut_at(clusters_out, n = 5)
table(igraph_clusters)
igraph_clusters
1 2 3 4 5
134 1156 305 108 98
cluster_by_marker <-
table(igraph_clusters, colData(sce10x_macrophage)$sample)
cluster_by_marker
igraph_clusters aged1 aged2 aged3 aged4 yng1 yng2 yng3
1 0 24 4 2 10 69 25
2 37 138 137 89 265 171 319
3 21 83 8 98 7 34 54
4 7 56 0 3 7 15 20
5 0 97 0 1 0 0 0
colData(sce10x_macrophage)$clusters <- factor(igraph_clusters)
sce10x_macrophage_subset <-
sce10x_macrophage[, colData(sce10x_macrophage)$clusters %in% c(1,2, 3)]
dt <-
bind_cols(
colData(sce10x_macrophage_subset) %>%
as_tibble(),
reducedDim(sce10x_macrophage_subset, "mnn_umap") %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = clusters
),
size = 1.7,
alpha = 1
)
p2 # + facet_wrap(~clusters)
ggsave(file.path(figures_dir, "mnn_umap_macrophage.pdf"), p2 )
Saving 14 x 10 in image
Save data
meta_dt <-
rbind(colData(sce10x_stem_subset), colData(sce10x_fap_subset), colData(sce10x_macrophage_subset)) %>%
as_tibble(rownames = "cell") %>%
mutate(cluster_level1_1= paste(clusters_level1, clusters, sep="_" ),
cluster_level2_1= paste(clusters_level2, clusters, sep="_" )) %>%
select(cell, cluster_level1_1, cluster_level2_1 )
meta_dt
colData(sce10x) <-
left_join(colData(sce10x)%>%
as_tibble(rownames = "cell"),
meta_dt,
by="cell") %>%
replace_na(list(cluster_level2_1 = "discard", cluster_level1_1 = "discard")) %>%
select(-clusters) %>%
column_to_rownames("cell") %>%
DataFrame(.)
discard_idx <-
colData(sce10x)$cluster_level2_1 == "discard"
table(discard_idx)
discard_idx
FALSE TRUE
16173 347
sce10x <- sce10x[, !discard_idx]
n_exprs_genes <-
nexprs(sce10x,
detection_limit = 0,
byrow = TRUE
)
table(n_exprs_genes)[1:20]
n_exprs_genes
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
104 2706 1447 1072 738 601 471 433 372 344 344 329 283 236 233 239 218 205 198 171
sce10x <- sce10x[n_exprs_genes>0, ]
saveRDS(
sce10x,
file.path(
data_dir,
"preprocessed",
"sce10x_filtered_final.rds"
)
)
make plots
dt <-
bind_cols(
colData(sce10x) %>%
as_tibble(),
reducedDim(sce10x, "mnn_umap") %>%
as_tibble()
)
p2 <-
ggplot(dt) +
geom_point(aes(umap1,
umap2,
colour = cluster_level1_1
),
size = .6,
alpha = 1
) + facet_wrap(~condition)
p2
ggsave(file.path(figures_dir, "mnn_umap_split.pdf"), p2 )
Saving 14 x 8 in image
sessionInfo()
R version 4.0.0 (2020-04-24)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
[4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] BiocSingular_1.4.0 batchelor_1.4.0 dbscan_1.1-5
[4] uwot_0.1.8 Matrix_1.2-18 forcats_0.5.0
[7] stringr_1.4.0 dplyr_0.8.5 purrr_0.3.4
[10] readr_1.3.1 tidyr_1.1.0 tibble_3.0.1
[13] tidyverse_1.3.0 scran_1.16.0 scater_1.16.0
[16] ggplot2_3.3.0 SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.1
[19] DelayedArray_0.14.0 matrixStats_0.56.0 Biobase_2.48.0
[22] GenomicRanges_1.40.0 GenomeInfoDb_1.24.0 IRanges_2.22.2
[25] S4Vectors_0.26.1 BiocGenerics_0.34.0
loaded via a namespace (and not attached):
[1] nlme_3.1-147 fs_1.4.1 bitops_1.0-6 lubridate_1.7.8
[5] RcppAnnoy_0.0.16 httr_1.4.1 rprojroot_1.3-2 tools_4.0.0
[9] backports_1.1.7 utf8_1.1.4 R6_2.4.1 irlba_2.3.3
[13] vipor_0.4.5 DBI_1.1.0 colorspace_1.4-1 withr_2.2.0
[17] tidyselect_1.1.0 gridExtra_2.3 compiler_4.0.0 cli_2.0.2
[21] rvest_0.3.5 BiocNeighbors_1.6.0 xml2_1.3.2 labeling_0.3
[25] scales_1.1.1 digest_0.6.25 rmarkdown_2.1 XVector_0.28.0
[29] base64enc_0.1-3 pkgconfig_2.0.3 htmltools_0.4.0 dbplyr_1.4.4
[33] limma_3.44.1 rlang_0.4.6 readxl_1.3.1 rstudioapi_0.11
[37] DelayedMatrixStats_1.10.0 farver_2.0.3 generics_0.0.2 jsonlite_1.6.1
[41] BiocParallel_1.22.0 RCurl_1.98-1.2 magrittr_1.5 GenomeInfoDbData_1.2.3
[45] fansi_0.4.1 Rcpp_1.0.4.6 ggbeeswarm_0.6.0 munsell_0.5.0
[49] viridis_0.5.1 lifecycle_0.2.0 stringi_1.4.6 yaml_2.2.1
[53] edgeR_3.30.0 zlibbioc_1.34.0 grid_4.0.0 blob_1.2.1
[57] dqrng_0.2.1 crayon_1.3.4 lattice_0.20-41 haven_2.3.0
[61] hms_0.5.3 locfit_1.5-9.4 knitr_1.28 pillar_1.4.4
[65] igraph_1.2.5 codetools_0.2-16 reprex_0.3.0 glue_1.4.1
[69] evaluate_0.14 modelr_0.1.8 vctrs_0.3.0 cellranger_1.1.0
[73] gtable_0.3.0 assertthat_0.2.1 xfun_0.14 rsvd_1.0.3
[77] broom_0.5.6 RSpectra_0.16-0 viridisLite_0.3.0 beeswarm_0.2.3
[81] statmod_1.4.34 ellipsis_0.3.1
---
title: "Mouse Muscle Stem Cell Project "
subtitle: "Part 3: cluster cells"
author: 
- name: Rick Farouni
  affiliation:
  - &cruk Génome Québec Innovation Centre, McGill University, Montreal, Canada
date: '`r format(Sys.Date(), "%Y-%B-%d")`'
output:
  html_notebook:
    df_print: paged
    code_folding: show
    toc: no
    toc_float: 
      collapsed: false
      smooth_scroll: false
---



# Prepare analysis workflow

## Set filepaths and parameters

```{r setup}
set.seed(42)
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
options(
  readr.show_progress = FALSE,
  digits = 2
)
```

## Load packages
```{r}
suppressPackageStartupMessages({
  library(scater)
  library(scran)
  library(tidyverse)
  library(uwot) # For umap
  library(dbscan)
  library(batchelor)
  library(BiocSingular)
})
theme_set(theme_bw())
```

## Define file paths

```{r}
data_dir <- "./data"
figures_dir <- file.path("./figures", "qc_cells")
```


## Load data

```{r}
sce10x <-
  readRDS(file.path(
    data_dir,
    "preprocessed",
    "sce10x_filtered_cells.rds"
  ))
```



```{r}
mitotic_dt<-
  perCellQCMetrics(
      sce10x,
      subsets = list(mitotic = which(rowData(sce10x)$gene_name %in% c("Ccna2", "Ccnb1", "Ccnb2"))),
      percent_top =NULL,
      exprs_values = c("counts"),
      flatten = TRUE
    ) %>%
    as_tibble() %>%
    dplyr::select(subsets_mitotic_sum, subsets_mitotic_detected, subsets_mitotic_percent)
mitotic_dt
```



```{r}
colData(sce10x) <-
  cbind(colData(sce10x), mitotic_dt)
```

# Cluster cell types

## Cluster young cells

```{r}
sce10x_yng <- sce10x[, colData(sce10x)$condition == "yng"]
```

```{r}
n_exprs_genes_yng <-
  nexprs(sce10x_yng,
    detection_limit = 5,
    byrow = TRUE
  )

table(n_exprs_genes_yng)[1:20]
```




```{r}
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_yng_tumap <-
  tumap(t(assay(sce10x_yng[n_exprs_genes_yng >= 3, ], "counts") %>%
    as.matrix()),
  metric = "cosine",
  ret_nn = TRUE
  )
```

```{r}
set.seed(42)
sce10x_umap_yng <-
  umap(t(assay(
    sce10x_yng[n_exprs_genes_yng >= 3, ],
    "counts"
  ) %>%
    as.matrix()),
  metric = "cosine",
  nn_method = sce10x_yng_tumap$nn,
  a = .6,
  b = .55
  )
colnames(sce10x_umap_yng) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_yng, "umap") <- sce10x_umap_yng
```

```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_yng) %>%
      as_tibble(),
    sce10x_umap_yng %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = markers
  ),
  size = 3,
  alpha = 1
  )
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
```


```{r}
set.seed(42)
g <- buildSNNGraph(sce10x_yng, k = 20, use.dimred = "umap")
```

```{r}
clusters_out <- igraph::cluster_walktrap(g)
# colLabels(sce10x) <- factor(igraph::cluster_louvain(g)$membership)
table(clusters_out$membership)
```


```{r}
igraph_clusters <- igraph::cut_at(clusters_out, n = 16)
table(igraph_clusters)
```


```{r}
cluster_by_marker <-
  table(igraph_clusters, colData(sce10x_yng)$markers)
cluster_by_marker
```



```{r}
labels <- apply(cluster_by_marker, 1, which.max)
level_key_1 <- colnames(cluster_by_marker)[labels]
names(level_key_1) <- names(labels)
level_key_1
```
```{r}
table(level_key_1)
```

```{r}
level_key_2 <- paste(level_key_1, names(level_key_1), sep = "_")
names(level_key_2) <- names(labels)
colData(sce10x_yng)$clusters_level1 <- recode(igraph_clusters, !!!level_key_1)
colData(sce10x_yng)$clusters_level2 <- recode(igraph_clusters, !!!level_key_2)
table(colData(sce10x_yng)$clusters_level1, colData(sce10x_yng)$sample)
```

```{r}
table(colData(sce10x_yng)$clusters_level2, colData(sce10x_yng)$sample)
```


```{r fig.height=12, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_yng) %>%
      as_tibble(),
    sce10x_umap_yng %>%
      as_tibble()
  )
p3 <-
  ggplot(dt, aes(umap1,
    umap2,
    colour = subsets_mitotic_detected >1
  )) +
  geom_jitter(
    width = .1,
    height = .1,
    size = 3,
    alpha = 1
  )
p3 + facet_wrap(~clusters_level2)
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_louvain_clusters.pdf"), p3 )
```


```{r}
colData(sce10x_yng)$clusters_level1[colData(sce10x_yng)$clusters_level2 =="stem_12"] <- "stem_mitotic"
```


```{r fig.height=12, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_yng) %>%
      as_tibble(),
    sce10x_umap_yng %>%
      as_tibble()
  )
p3 <-
  ggplot(dt, aes(umap1,
    umap2,
    colour = clusters_level1
  )) +
  geom_jitter(
    width = .1,
    height = .1,
    size = 3,
    alpha = 1
  )
p3 
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_louvain_clusters.pdf"), p3 )
```

## Cluster aged cells

```{r}
sce10x_aged <- sce10x[, colData(sce10x)$condition == "aged"]
```

```{r}
n_exprs_genes_aged <-
  nexprs(sce10x_aged,
    detection_limit = 5,
    byrow = TRUE
  )

table(n_exprs_genes_aged)[1:20]
```

```{r}
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_aged_tumap <-
  tumap(t(assay(
    sce10x_aged[n_exprs_genes_aged >= 3, ],
    "counts"
  ) %>%
    as.matrix()),
  metric = "cosine",
  ret_nn = TRUE
  )
```


```{r}
set.seed(42)
sce10x_umap_aged <-
  umap(t(assay(
    sce10x_aged[n_exprs_genes_aged >= 3, ],
    "counts"
  ) %>%
    as.matrix()),
  metric = "cosine",
  nn_method = sce10x_aged_tumap$nn,
  a = .6,
  b = .6
  )
colnames(sce10x_umap_aged) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_aged, "umap") <- sce10x_umap_aged
```

```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_aged) %>%
      as_tibble(),
    sce10x_umap_aged %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = markers
  ),
  size = 1,
  alpha = 1
  )
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
```





```{r}
set.seed(42)
g <- buildSNNGraph(sce10x_aged, k = 20, use.dimred = "umap")
```

```{r}
clusters_out <- igraph::cluster_walktrap(g)
# colLabels(sce10x) <- factor(igraph::cluster_louvain(g)$membership)
table(clusters_out$membership)
```

```{r}
igraph_clusters <- igraph::cut_at(clusters_out, n = 9)
table(igraph_clusters)
```


```{r}
cluster_by_marker <-
  table(igraph_clusters, colData(sce10x_aged)$markers)
cluster_by_marker
```



```{r}
labels <- apply(cluster_by_marker, 1, which.max)
level_key_1 <- colnames(cluster_by_marker)[labels]
names(level_key_1) <- names(labels)
level_key_1
```
```{r}
table(level_key_1)
```

```{r}
level_key_2 <- paste(level_key_1, names(level_key_1), sep = "_")
names(level_key_2) <- names(labels)
level_key_2
```

```{r}
colData(sce10x_aged)$clusters_level1 <- recode(igraph_clusters, !!!level_key_1)
colData(sce10x_aged)$clusters_level2 <- recode(igraph_clusters, !!!level_key_2)
```
```{r}
table(colData(sce10x_aged)$clusters_level1, colData(sce10x_aged)$sample)
```
```{r}
table(colData(sce10x_aged)$clusters_level2, colData(sce10x_aged)$sample)
```




```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_aged) %>%
      as_tibble(),
    sce10x_umap_aged %>%
      as_tibble()
  )
p3 <-
  ggplot(dt, aes(umap1,
    umap2,
    colour = clusters_level1
  )) +
  geom_jitter(
    width = .1,
    height = .1,
    size = .7,
    alpha = 1
  )
p3 #+facet_wrap(~clusters_level2)
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_louvain_clusters.pdf"), p3 )
```

## Save data


```{r}
meta_dt <-
  rbind(colData(sce10x_aged), colData(sce10x_yng))
colData(sce10x) <- meta_dt
```


```{r}
saveRDS(sce10x,
        file.path(
  data_dir,
  "preprocessed",
  "sce10x_filtered_cells.rds"))
```


# Normalize and correct for batch effects

## Discard non-marked and mitotic cells


```{r}
sce10x <- sce10x[, colData(sce10x)$clusters_level1 %in% c("macrophage", "fap", "stem") ]
```

```{r}
n_exprs_genes <-
  nexprs(sce10x,
    detection_limit = 0,
    byrow = TRUE
  )
table(n_exprs_genes)[1:20]
```

```{r}
sce10x <- sce10x[n_exprs_genes >0,]
sce10x
```



```{r}
colData(sce10x)$clusters_level2 <- 
  paste(colData(sce10x)$condition, 
        colData(sce10x)$clusters_level1, sep="_")

```

## Normalize data across samples

```{r}
sce10x <-
    multiBatchNorm(sce10x,
      batch = colData(sce10x)$sample
    )
```

```{r}
n_exprs_genes <-
  nexprs(sce10x,
    detection_limit = 5,
    byrow = TRUE
  )
table(n_exprs_genes)[1:20]
```

## Run fast mutual nearest neighbors correction

```{r}
sce10x_corrected <-
  fastMNN(sce10x,
    batch = colData(sce10x)$sample,
    k = 20,
    d = 200,
    subset.row = names(n_exprs_genes)[n_exprs_genes >= 3]
  )
```

```{r}
reducedDim(sce10x, "mnn_corrected") <- reducedDim(sce10x_corrected, "corrected")
```


## Run UMAP



```{r}
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_tumap <-
  tumap(reducedDim(sce10x, 
                   "mnn_corrected"),
        metric = "cosine",
        ret_nn = TRUE)
```

```{r}
set.seed(42)
sce10x_umap <-
  umap(reducedDim(sce10x,
                  "mnn_corrected"),
    metric = "cosine",
    nn_method = sce10x_tumap$nn,
    a = .8,
    b = .55
  )
colnames(sce10x_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x, "mnn_umap") <- sce10x_umap
```

```{r fig.height=8, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x) %>%
      as_tibble(),
    sce10x_umap %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = clusters_level1
  ),
  size = .6,
  alpha = 1
  ) #+ facet_wrap(~condition)
p2 
ggsave(file.path(figures_dir, "mnn_umap.pdf"), p2 )
```





## Cluster stem cells

```{r}
sce10x_stem <- sce10x[,colData(sce10x)$clusters_level1 =="stem"]
```



```{r}
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_stem_tumap <-
  tumap(reducedDim(sce10x_stem, 
                   "mnn_corrected"),
        metric = "cosine",
        ret_nn = TRUE)
```

```{r}
set.seed(42)
sce10x_stem_umap <-
  umap(reducedDim(sce10x_stem,
                  "mnn_corrected"),
    metric = "cosine",
    nn_method = sce10x_stem_tumap$nn,
    a = .85,
    b = .43
  )
colnames(sce10x_stem_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_stem, "mnn_umap") <- sce10x_stem_umap
```

```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_stem) %>%
      as_tibble(),
    sce10x_stem_umap %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = clusters_level2
  ),
  size = 1,
  alpha =.6
  )
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
```



```{r}
set.seed(42)
g <- buildSNNGraph(sce10x_stem, k = 100, use.dimred = "mnn_umap")
clusters_out <- igraph::cluster_walktrap(g)
```

```{r}
igraph_clusters <- factor(clusters_out$membership)
table(igraph_clusters)
```

```{r}
igraph_clusters <- igraph::cut_at(clusters_out, n = 5)
table(igraph_clusters)
```


```{r}
discard_cell <- sce10x_stem_umap[,1] < -10 | sce10x_stem_umap[,1] > 22  | (igraph_clusters ==2 & colData(sce10x_stem)$condition=="aged")
table(discard_cell)
```



```{r}
igraph_clusters[discard_cell] <- 6
```


```{r}
cluster_by_marker <-
  table(igraph_clusters, colData(sce10x_stem)$sample)
cluster_by_marker
```



```{r}
colData(sce10x_stem)$clusters <- factor(igraph_clusters)
```

```{r}
sce10x_stem_subset <-
  sce10x_stem[, colData(sce10x_stem)$clusters!=6]
```


```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_stem_subset) %>%
      as_tibble(),
    reducedDim(sce10x_stem_subset, "mnn_umap") %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = clusters
  ),
  size = 1.7,
  alpha = 1
  )
p2  # + facet_wrap(~clusters)
ggsave(file.path(figures_dir, "mnn_umap_stem.pdf"), p2 )
```







## Cluster fap cells

```{r}
sce10x_fap <- sce10x[,colData(sce10x)$clusters_level1 =="fap"]
```



```{r}
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_fap_tumap <-
  tumap(reducedDim(sce10x_fap, 
                   "mnn_corrected"),
        metric = "cosine",
        ret_nn = TRUE)
```

```{r}
set.seed(42)
sce10x_fap_umap <-
  umap(reducedDim(sce10x_fap,
                  "mnn_corrected"),
    metric = "cosine",
    nn_method = sce10x_fap_tumap$nn,
    a = .9,
    b = .75
  )
colnames(sce10x_fap_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_fap, "mnn_umap") <- sce10x_fap_umap
```

```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_fap) %>%
      as_tibble(),
    sce10x_fap_umap %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = clusters_level2
  ),
  size = 1,
  alpha =.6
  )
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
```



```{r}
set.seed(42)
g <- buildSNNGraph(sce10x_fap, k = 100, use.dimred = "mnn_umap")
clusters_out <- igraph::cluster_walktrap(g)
```

```{r}
igraph_clusters <- factor(clusters_out$membership)
table(igraph_clusters)
```

```{r}
igraph_clusters <- igraph::cut_at(clusters_out, n = 3)
table(igraph_clusters)
```


```{r}
discard_cell <-  sce10x_fap_umap[,1] > 5
table(discard_cell)
```



```{r}
igraph_clusters[discard_cell] <- 4
```


```{r}
cluster_by_marker <-
  table(igraph_clusters, colData(sce10x_fap)$sample)
cluster_by_marker
```



```{r}
colData(sce10x_fap)$clusters <- factor(igraph_clusters)
```

```{r}
sce10x_fap_subset <-
  sce10x_fap[, colData(sce10x_fap)$clusters!=4]
```


```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_fap_subset) %>%
      as_tibble(),
    reducedDim(sce10x_fap_subset, "mnn_umap") %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = clusters
  ),
  size = 1.7,
  alpha = 1
  )
p2  # + facet_wrap(~clusters)
ggsave(file.path(figures_dir, "mnn_umap_fap.pdf"), p2 )
```



## Cluster macrophage cells

```{r}
sce10x_macrophage <- sce10x[,colData(sce10x)$clusters_level1 =="macrophage"]
```



```{r}
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_macrophage_tumap <-
  tumap(reducedDim(sce10x_macrophage, 
                   "mnn_corrected"),
        metric = "cosine",
        ret_nn = TRUE)
```

```{r}
set.seed(42)
sce10x_macrophage_umap <-
  umap(reducedDim(sce10x_macrophage,
                  "mnn_corrected"),
    metric = "cosine",
    nn_method = sce10x_macrophage_tumap$nn,
    a = 0.75,
    b = 0.65
  )
colnames(sce10x_macrophage_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_macrophage, "mnn_umap") <- sce10x_macrophage_umap
```

```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_macrophage) %>%
      as_tibble(),
    sce10x_macrophage_umap %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = clusters_level2
  ),
  size = 1,
  alpha =.6
  )
p2
# ggsave(file.path(figures_dir, "umap_cosine_a160_b33_17928_cells_markers.pdf"), p2 )
```



```{r}
set.seed(42)
g <- buildSNNGraph(sce10x_macrophage, k = 50, use.dimred = "mnn_umap")
clusters_out <- igraph::cluster_walktrap(g)
```

```{r}
igraph_clusters <- factor(clusters_out$membership)
table(igraph_clusters)
```

```{r}
igraph_clusters <- igraph::cut_at(clusters_out, n = 5)
table(igraph_clusters)
```


```{r}
cluster_by_marker <-
  table(igraph_clusters, colData(sce10x_macrophage)$sample)
cluster_by_marker
```



```{r}
colData(sce10x_macrophage)$clusters <- factor(igraph_clusters)
```

```{r}
sce10x_macrophage_subset <-
  sce10x_macrophage[, colData(sce10x_macrophage)$clusters %in% c(1,2, 3)]
```


```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_macrophage_subset) %>%
      as_tibble(),
    reducedDim(sce10x_macrophage_subset, "mnn_umap") %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = clusters
  ),
  size = 1.7,
  alpha = 1
  )
p2  # + facet_wrap(~clusters)
ggsave(file.path(figures_dir, "mnn_umap_macrophage.pdf"), p2 )
```



## Save data


```{r}
meta_dt <-
  rbind(colData(sce10x_stem_subset), colData(sce10x_fap_subset), colData(sce10x_macrophage_subset)) %>%
  as_tibble(rownames = "cell") %>%
  mutate(cluster_level1_1= paste(clusters_level1, clusters, sep="_" ),
         cluster_level2_1= paste(clusters_level2, clusters, sep="_" )) %>%
  select(cell, cluster_level1_1, cluster_level2_1 )
meta_dt
```

```{r}
colData(sce10x) <-
  left_join(colData(sce10x)%>%
  as_tibble(rownames = "cell"),
  meta_dt,
  by="cell") %>%
  replace_na(list(cluster_level2_1 = "discard", cluster_level1_1 = "discard")) %>%
  select(-clusters) %>%
  column_to_rownames("cell") %>%
  DataFrame(.)

``` 

```{r}
discard_idx <- 
  colData(sce10x)$cluster_level2_1 == "discard"
table(discard_idx)
```


```{r}
sce10x <- sce10x[, !discard_idx]
```

```{r}
n_exprs_genes <-
  nexprs(sce10x,
    detection_limit = 0,
    byrow = TRUE
  )
table(n_exprs_genes)[1:20]
```

```{r}
sce10x <- sce10x[n_exprs_genes>0, ]
```



```{r}
saveRDS(
  sce10x,
  file.path(
    data_dir,
    "preprocessed",
    "sce10x_filtered_final.rds"
  )
)
```

## make plots

```{r fig.height=8, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x) %>%
      as_tibble(),
    reducedDim(sce10x, "mnn_umap") %>%
      as_tibble()
  )
p2 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = cluster_level1_1
  ),
  size = .6,
  alpha = 1
  ) + facet_wrap(~condition)
p2 
ggsave(file.path(figures_dir, "mnn_umap_split.pdf"), p2 )
```

```{r}
sessionInfo()
```
