Prepare analysis workflow

Set filepaths and parameters

set.seed(42)
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
options(
  readr.show_progress = FALSE,
  digits = 2
)

Load packages

suppressPackageStartupMessages({
  library(scater)
  library(scran)
  library(tidyverse)
  library(uwot) # For umap
})

Define file paths

data_dir <- "./data"
figures_dir <- file.path("./figures")

Load data

sce10x <-
  readRDS(file.path(
    data_dir,
    "preprocessed",
    "sce10x_filtered_final.rds"
  ))
table(colData(sce10x)$celltype, colData(sce10x)$sample)
            
             yng1 yng2 yng3 aged1 aged2 aged3 aged4
  fap         498  390  637  2020  2009  1568  1027
  macrophage  282  274  398    58   245   149   189
  stem       1199 1913 1049   521   682   326   739

Lognormalize spliced and spliced using common size factors

sce10x <-
  logNormCounts(sce10x,
    size_factors = sizeFactors(sce10x),
    exprs_values = "spliced",
    name = "spliced_logcounts",
    center_size_factors = FALSE
  )

sce10x <-
  logNormCounts(sce10x,
    size_factors = sizeFactors(sce10x),
    exprs_values = "unspliced",
    name = "unspliced_logcounts",
    center_size_factors = FALSE
  )

UMAP

Subset data

sce10x_stem <- sce10x[, colData(sce10x)$celltype == "stem"]
sce10x_fap <- sce10x[, colData(sce10x)$celltype == "fap"]
sce10x_macrophage <- sce10x[, colData(sce10x)$celltype == "macrophage"]

Run on stem

set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_stem_tumap <-
  tumap(reducedDim(
    sce10x_stem,
    "mnn_corrected"
  ),
  metric = "cosine",
  ret_nn = TRUE
  )
set.seed(42)
sce10x_stem_umap <-
  umap(reducedDim(
    sce10x_stem,
    "mnn_corrected"
  ),
  metric = "cosine",
  nn_method = sce10x_stem_tumap$nn,
  a = .7,
  b = .52
  )
colnames(sce10x_stem_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_stem, "mnn_umap") <- sce10x_stem_umap
set.seed(42)
g <- buildSNNGraph(sce10x_stem, k = 200, use.dimred = "mnn_umap")

clusters_out <- igraph::cluster_walktrap(g)
table(clusters_out$membership)

   1    2    3    4    5    6    7    8    9 
 963 1132  853  817  798  415  367  480  604 
stem_clusters <- igraph::cut_at(clusters_out, n = 3)
stem_clusters <- paste("stem", stem_clusters, sep = "_")
colData(sce10x_stem)$clusters <- stem_clusters
table(stem_clusters)
stem_clusters
stem_1 stem_2 stem_3 
  2993   2832    604 
dt <-
  bind_cols(
    colData(sce10x_stem) %>%
      as_tibble(),
    sce10x_stem_umap %>%
      as_tibble()
  )
p1 <-
  ggplot(dt, aes(umap1,
    umap2,
    colour = clusters
  )) +
  geom_point(size = 1) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic()
p1

ggsave(file.path(figures_dir, "umap_mnn_stem.pdf"), p1)
Saving 14 x 10 in image
p2 <- ggplot(dt, aes(umap1,
  umap2,
  colour = clusters
)) +
  geom_point(size = 1) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic() +
  facet_wrap(~condition, ncol = 1)
p2

ggsave(file.path(figures_dir,
                 "umap_stem_clusters_condition.pdf"), p2)
Saving 14 x 10 in image
p3 <- ggplot(dt, aes(umap1,
  umap2,
  colour = condition
)) +
  geom_point(size = 1, alpha = 0.7) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic()
p3

ggsave(file.path(figures_dir,
                 "umap_stem_clusters_condition_color.pdf"), p3)
Saving 14 x 10 in image

Run on fap

set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_fap_tumap <-
  tumap(reducedDim(
    sce10x_fap,
    "mnn_corrected"
  ),
  metric = "cosine",
  ret_nn = TRUE
  )
set.seed(42)
sce10x_fap_umap <-
  umap(reducedDim(
    sce10x_fap,
    "mnn_corrected"
  ),
  metric = "cosine",
  nn_method = sce10x_fap_tumap$nn,
  a = .95,
  b = .70
  )
colnames(sce10x_fap_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_fap, "mnn_umap") <- sce10x_fap_umap
set.seed(42)
g <- buildSNNGraph(sce10x_fap, k = 100, use.dimred = "mnn_umap")

clusters_out <- igraph::cluster_walktrap(g)
table(clusters_out$membership)

   1    2    3    4    5    6    7    8    9   10   11   12   13 
 718 1085  751 1002  659  880  752  479  686  399  369  155  214 
fap_clusters <- igraph::cut_at(clusters_out, n = 2)
fap_clusters <- paste("fap", fap_clusters, sep = "_")
colData(sce10x_fap)$clusters <- fap_clusters
table(fap_clusters)
fap_clusters
fap_1 fap_2 
 6150  1999 
dt <-
  bind_cols(
    colData(sce10x_fap) %>%
      as_tibble(),
    sce10x_fap_umap %>%
      as_tibble()
  )

p4 <-
  ggplot(dt, aes(umap1,
    umap2,
    colour = clusters
  )) +
  geom_point(size = 1) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic()
p4

ggsave(file.path(figures_dir,
                 "umap_mnn_fap.pdf"), p4)
Saving 14 x 10 in image
p5 <- ggplot(dt, aes(umap1,
  umap2,
  colour = clusters
)) +
  geom_point(size = 1) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic() +
  facet_wrap(~condition, ncol = 1)
p5

ggsave(file.path(figures_dir,
                 "umap_fap_clusters_condition.pdf"), p5)
Saving 14 x 10 in image

Run on macrophage

set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_macrophage_tumap <-
  tumap(reducedDim(
    sce10x_macrophage,
    "mnn_corrected"
  ),
  metric = "cosine",
  ret_nn = TRUE
  )
set.seed(42)
sce10x_macrophage_umap <-
  umap(reducedDim(
    sce10x_macrophage,
    "mnn_corrected"
  ),
  metric = "cosine",
  nn_method = sce10x_macrophage_tumap$nn,
  a = 1.2,
  b = 1.4
  )
colnames(sce10x_macrophage_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_macrophage, "mnn_umap") <- sce10x_macrophage_umap
dt <-
  bind_cols(
    colData(sce10x_macrophage) %>%
      as_tibble(),
    sce10x_macrophage_umap %>%
      as_tibble()
  )

p6 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = clusters
  ),
  size = 1,
  alpha = 1
  ) +
  theme_classic() +
  facet_wrap(~condition)
p6

ggsave(file.path(figures_dir,
                 "umap_mnn_macrophage.pdf"), p6)
Saving 14 x 10 in image

Combine and save

sce10x <-
  cbind(
    sce10x_stem,
    sce10x_fap,
    sce10x_macrophage
  )
colData(sce10x)$celltype_condition <-
  paste(colData(sce10x)$cell_type, colData(sce10x)$condition, sep = "_")
colData(sce10x)$condition <- factor(colData(sce10x)$condition,
                                    levels = c("yng", "aged"))
colData(sce10x)$sample <- factor(colData(sce10x)$sample, 
                                 levels = c("yng1", "yng2", "yng3",
                                            "aged1", "aged2", "aged3", "aged4"))
colData(sce10x)$celltype_sample <- paste(colData(sce10x)$celltype, 
                                         colData(sce10x)$sample, sep = "_")
colData(sce10x)$clusters_condition <-
  paste(colData(sce10x)$clusters, colData(sce10x)$condition, sep = "_")
colData(sce10x)$clusters_sample <- paste(colData(sce10x)$clusters,
                                         colData(sce10x)$sample, sep = "_")
DF <-
  colData(sce10x) %>%
  as_tibble() %>%
  select(
    sizeFactor, sample, condition, celltype, clusters,
    celltype_condition, clusters_condition, celltype_sample, 
    clusters_sample, sum, detected, sum_unspliced, detected_unspliced, 
    unspliced_ov_spliced, unspliced_ov_total
  ) %>%
  DataFrame(.)

rownames(DF) <- rownames(colData(sce10x))
colData(sce10x) <- DF
colData(sce10x)
DataFrame with 16173 rows and 15 columns
                    sizeFactor   sample condition    celltype    clusters celltype_condition clusters_condition celltype_sample clusters_sample
                     <numeric> <factor>  <factor> <character> <character>        <character>        <character>     <character>     <character>
CTACAGAGTGGCCCAT_d1   1.085628    aged1      aged        stem      stem_1          stem_aged        stem_1_aged      stem_aged1    stem_1_aged1
CTAACCCCACACCTGG_d1   0.898127    aged1      aged        stem      stem_1          stem_aged        stem_1_aged      stem_aged1    stem_1_aged1
AGGAGGTCACAGGATG_d1   0.854388    aged1      aged        stem      stem_1          stem_aged        stem_1_aged      stem_aged1    stem_1_aged1
GTGGCGTTCGCTCCTA_d1   0.908817    aged1      aged        stem      stem_1          stem_aged        stem_1_aged      stem_aged1    stem_1_aged1
GAACTGTGTAGACAAT_d1   0.861042    aged1      aged        stem      stem_1          stem_aged        stem_1_aged      stem_aged1    stem_1_aged1
...                        ...      ...       ...         ...         ...                ...                ...             ...             ...
CACTGTCGTGTCCATA_g3   0.610536     yng3       yng  macrophage  macrophage     macrophage_yng     macrophage_yng macrophage_yng3 macrophage_yng3
CTGTGAATCTCCATAT_g3   0.571393     yng3       yng  macrophage  macrophage     macrophage_yng     macrophage_yng macrophage_yng3 macrophage_yng3
GTCAGCGAGCGCCTCA_g3   0.436667     yng3       yng  macrophage  macrophage     macrophage_yng     macrophage_yng macrophage_yng3 macrophage_yng3
GTCGTTCAGCCATTCA_g3   0.471375     yng3       yng  macrophage  macrophage     macrophage_yng     macrophage_yng macrophage_yng3 macrophage_yng3
GATCGTATCGCGTCGA_g3   0.687047     yng3       yng  macrophage  macrophage     macrophage_yng     macrophage_yng macrophage_yng3 macrophage_yng3
                          sum  detected sum_unspliced detected_unspliced unspliced_ov_spliced unspliced_ov_total
                    <numeric> <integer>     <numeric>          <integer>            <numeric>          <numeric>
CTACAGAGTGGCCCAT_d1      9953      3042       2363.11               1213             0.311349           0.237427
CTAACCCCACACCTGG_d1      8234      2954       2887.40               1334             0.540044           0.350668
AGGAGGTCACAGGATG_d1      7833      2553       2068.85               1054             0.358918           0.264120
GTGGCGTTCGCTCCTA_d1      8332      2708       2045.49               1072             0.325378           0.245498
GAACTGTGTAGACAAT_d1      7894      3064       2802.43               1452             0.550405           0.355007
...                       ...       ...           ...                ...                  ...                ...
CACTGTCGTGTCCATA_g3      5506      2053      1196.860                738             0.277749           0.217374
CTGTGAATCTCCATAT_g3      5153      2268      1441.417                810             0.388356           0.279724
GTCAGCGAGCGCCTCA_g3      3938      1869      1178.239                731             0.426935           0.299197
GTCGTTCAGCCATTCA_g3      4251      2148       816.238                570             0.237640           0.192011
GATCGTATCGCGTCGA_g3      6196      2546      1429.636                878             0.299943           0.230735
table(colData(sce10x)$clusters, colData(sce10x)$sample)
            
             yng1 yng2 yng3 aged1 aged2 aged3 aged4
  fap_1       295  284  389  1630  1585  1134   833
  fap_2       203  106  248   390   424   434   194
  macrophage  282  274  398    58   245   149   189
  stem_1      397  661  541   370   404   185   435
  stem_2      549 1030  379   151   278   141   304
  stem_3      253  222  129     0     0     0     0
saveRDS(
  sce10x,
  file.path(
    data_dir,
    "preprocessed",
    "sce10x_filtered_final.rds"
  )
)
sessionInfo()
---
title: "Mouse Muscle Stem Cell Project "
subtitle: "Part 3b: cluster celltypes"
author: 
- name: Rick Farouni
  affiliation:
  - &cruk Génome Québec Innovation Centre, McGill University, Montreal, Canada
date: '`r format(Sys.Date(), "%Y-%B-%d")`'
output:
  html_notebook:
    df_print: paged
    code_folding: show
    toc: no
    toc_float: 
      collapsed: false
      smooth_scroll: false
---



# Prepare analysis workflow

## Set filepaths and parameters

```{r setup}
set.seed(42)
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
options(
  readr.show_progress = FALSE,
  digits = 2
)
```

## Load packages
```{r}
suppressPackageStartupMessages({
  library(scater)
  library(scran)
  library(tidyverse)
  library(uwot) # For umap
})
```

## Define file paths

```{r}
data_dir <- "./data"
figures_dir <- file.path("./figures")
```



## Load data
```{r}
sce10x <-
  readRDS(file.path(
    data_dir,
    "preprocessed",
    "sce10x_filtered_final.rds"
  ))
```


```{r}
table(colData(sce10x)$celltype, colData(sce10x)$sample)
```




## Lognormalize spliced and spliced using common size factors

```{r}
sce10x <-
  logNormCounts(sce10x,
    size_factors = sizeFactors(sce10x),
    exprs_values = "spliced",
    name = "spliced_logcounts",
    center_size_factors = FALSE
  )

sce10x <-
  logNormCounts(sce10x,
    size_factors = sizeFactors(sce10x),
    exprs_values = "unspliced",
    name = "unspliced_logcounts",
    center_size_factors = FALSE
  )
```



# UMAP

## Subset data

```{r}
sce10x_stem <- sce10x[, colData(sce10x)$celltype == "stem"]
sce10x_fap <- sce10x[, colData(sce10x)$celltype == "fap"]
sce10x_macrophage <- sce10x[, colData(sce10x)$celltype == "macrophage"]
```

## Run on stem 

```{r}
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_stem_tumap <-
  tumap(reducedDim(
    sce10x_stem,
    "mnn_corrected"
  ),
  metric = "cosine",
  ret_nn = TRUE
  )
```

```{r}
set.seed(42)
sce10x_stem_umap <-
  umap(reducedDim(
    sce10x_stem,
    "mnn_corrected"
  ),
  metric = "cosine",
  nn_method = sce10x_stem_tumap$nn,
  a = .7,
  b = .52
  )
colnames(sce10x_stem_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_stem, "mnn_umap") <- sce10x_stem_umap
```


```{r}
set.seed(42)
g <- buildSNNGraph(sce10x_stem, k = 200, use.dimred = "mnn_umap")

clusters_out <- igraph::cluster_walktrap(g)
table(clusters_out$membership)
```


```{r}
stem_clusters <- igraph::cut_at(clusters_out, n = 3)
stem_clusters <- paste("stem", stem_clusters, sep = "_")
colData(sce10x_stem)$clusters <- stem_clusters
table(stem_clusters)
```


```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_stem) %>%
      as_tibble(),
    sce10x_stem_umap %>%
      as_tibble()
  )
p1 <-
  ggplot(dt, aes(umap1,
    umap2,
    colour = clusters
  )) +
  geom_point(size = 1) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic()
p1
ggsave(file.path(figures_dir, "umap_mnn_stem.pdf"), p1)
```



```{r fig.height=10, fig.width=14}
p2 <- ggplot(dt, aes(umap1,
  umap2,
  colour = clusters
)) +
  geom_point(size = 1) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic() +
  facet_wrap(~condition, ncol = 1)
p2
ggsave(file.path(figures_dir,
                 "umap_stem_clusters_condition.pdf"), p2)
```


```{r fig.height=10, fig.width=14}
p3 <- ggplot(dt, aes(umap1,
  umap2,
  colour = condition
)) +
  geom_point(size = 1, alpha = 0.7) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic()
p3
ggsave(file.path(figures_dir,
                 "umap_stem_clusters_condition_color.pdf"), p3)
```


## Run on fap


```{r}
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_fap_tumap <-
  tumap(reducedDim(
    sce10x_fap,
    "mnn_corrected"
  ),
  metric = "cosine",
  ret_nn = TRUE
  )
```

```{r}
set.seed(42)
sce10x_fap_umap <-
  umap(reducedDim(
    sce10x_fap,
    "mnn_corrected"
  ),
  metric = "cosine",
  nn_method = sce10x_fap_tumap$nn,
  a = .95,
  b = .70
  )
colnames(sce10x_fap_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_fap, "mnn_umap") <- sce10x_fap_umap
```

```{r}
set.seed(42)
g <- buildSNNGraph(sce10x_fap, k = 100, use.dimred = "mnn_umap")

clusters_out <- igraph::cluster_walktrap(g)
table(clusters_out$membership)
```


```{r}
fap_clusters <- igraph::cut_at(clusters_out, n = 2)
fap_clusters <- paste("fap", fap_clusters, sep = "_")
colData(sce10x_fap)$clusters <- fap_clusters
table(fap_clusters)
```


```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_fap) %>%
      as_tibble(),
    sce10x_fap_umap %>%
      as_tibble()
  )

p4 <-
  ggplot(dt, aes(umap1,
    umap2,
    colour = clusters
  )) +
  geom_point(size = 1) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic()
p4
ggsave(file.path(figures_dir,
                 "umap_mnn_fap.pdf"), p4)
```


```{r fig.height=10, fig.width=14}
p5 <- ggplot(dt, aes(umap1,
  umap2,
  colour = clusters
)) +
  geom_point(size = 1) +
  scale_colour_viridis_d(option = "plasma") +
  theme_classic() +
  facet_wrap(~condition, ncol = 1)
p5
ggsave(file.path(figures_dir,
                 "umap_fap_clusters_condition.pdf"), p5)
```

## Run on macrophage

```{r}
set.seed(42)
# t-UMAP is equivalent to a = 1, b = 1
# get the nearest neighbor data back
sce10x_macrophage_tumap <-
  tumap(reducedDim(
    sce10x_macrophage,
    "mnn_corrected"
  ),
  metric = "cosine",
  ret_nn = TRUE
  )
```

```{r}
set.seed(42)
sce10x_macrophage_umap <-
  umap(reducedDim(
    sce10x_macrophage,
    "mnn_corrected"
  ),
  metric = "cosine",
  nn_method = sce10x_macrophage_tumap$nn,
  a = 1.2,
  b = 1.4
  )
colnames(sce10x_macrophage_umap) <- paste0("umap", seq(1, 2))
reducedDim(sce10x_macrophage, "mnn_umap") <- sce10x_macrophage_umap
```

```{r fig.height=10, fig.width=14}
dt <-
  bind_cols(
    colData(sce10x_macrophage) %>%
      as_tibble(),
    sce10x_macrophage_umap %>%
      as_tibble()
  )

p6 <-
  ggplot(dt) +
  geom_point(aes(umap1,
    umap2,
    colour = clusters
  ),
  size = 1,
  alpha = 1
  ) +
  theme_classic() +
  facet_wrap(~condition)
p6
ggsave(file.path(figures_dir,
                 "umap_mnn_macrophage.pdf"), p6)
```



# Combine and save

```{r}
sce10x <-
  cbind(
    sce10x_stem,
    sce10x_fap,
    sce10x_macrophage
  )
```


```{r}
colData(sce10x)$celltype_condition <-
  paste(colData(sce10x)$celltype, colData(sce10x)$condition, sep = "_")
colData(sce10x)$condition <- factor(colData(sce10x)$condition,
                                    levels = c("yng", "aged"))
colData(sce10x)$sample <- factor(colData(sce10x)$sample, 
                                 levels = c("yng1", "yng2", "yng3",
                                            "aged1", "aged2", "aged3", "aged4"))
colData(sce10x)$celltype_sample <- paste(colData(sce10x)$celltype, 
                                         colData(sce10x)$sample, sep = "_")
colData(sce10x)$clusters_condition <-
  paste(colData(sce10x)$clusters, colData(sce10x)$condition, sep = "_")
colData(sce10x)$clusters_sample <- paste(colData(sce10x)$clusters,
                                         colData(sce10x)$sample, sep = "_")
```

```{r}
DF <-
  colData(sce10x) %>%
  as_tibble() %>%
  select(
    sizeFactor, sample, condition, celltype, clusters,
    celltype_condition, clusters_condition, celltype_sample, 
    clusters_sample, sum, detected, sum_unspliced, detected_unspliced, 
    unspliced_ov_spliced, unspliced_ov_total
  ) %>%
  DataFrame(.)

rownames(DF) <- rownames(colData(sce10x))
colData(sce10x) <- DF
```


```{r}
colData(sce10x)
```


```{r}
table(colData(sce10x)$clusters, colData(sce10x)$sample)
```


```{r}
saveRDS(
  sce10x,
  file.path(
    data_dir,
    "preprocessed",
    "sce10x_filtered_final.rds"
  )
)
```

```{r}
sessionInfo()
```
