2023_0730/ 整合 snRNA-seq的資料, 去除Batch effect /Bioinformagician
Source: https://www.youtube.com/watch?v=HrbeaEJqKcY&ab_channel=Bioinformagician
Source: https://mojaveazure.github.io/seurat-object/reference/Seurat-methods.html
Title: Integrate single-cell RNA-Seq datasets in R using Seurat (CCA) | Detailed Seurat Workflow Tutorial
# script to integrate scRNA-Seq datasets to correct for batch effects
# setwd("~/Desktop/demo/single_cell_integrate") 設定程式執行時的檔案目錄
#在這次測試中我的目錄路徑# data/= D:/YCT/scRNAseq/GSE180665integrate_demo/
# load libraries
library(Seurat)
library(tidyverse) #separate()就是它的功能
library(ggplot2)
library(gridExtra)
# get data location以及建立 Seurat object
# dirs <- list.dirs(path = 'data/', recursive = F, full.names = F)
dirs <- list.dirs(path = 'D:/YCT/scRNAseq/GSE180665integrate_demo/', recursive = FALSE, full.names = FALSE)
for(x in dirs){
name <- gsub('_filtered_feature_bc_matrix','', x)
#cts <- ReadMtx(mtx = paste0('data/',x,'/matrix.mtx.gz'),
# features = paste0('data/',x,'/features.tsv.gz'),
# cells = paste0('data/',x,'/barcodes.tsv.gz'))
## ReadMtx()是Seurat的一個功能, 有特定的語法, 會得到一個matrix, 含有 features(gene name), 及不同細胞(barcodes), 一個2維的資料。
## 可以用 cts[1:10,1:10]看前10x10的矩陣的樣子,凡是可以用到 CreateSeuratObject 的檔案, 應該都是matrix長得一樣。。
cts <- ReadMtx(mtx = paste0('D:/YCT/scRNAseq/GSE180665integrate_demo/',x,'/matrix.mtx.gz'),
features = paste0('D:/YCT/scRNAseq/GSE180665integrate_demo/',x,'/features.tsv.gz'),
cells = paste0('D:/YCT/scRNAseq/GSE180665integrate_demo/',x,'/barcodes.tsv.gz'))
# create seurat objects
assign(name, CreateSeuratObject(counts = cts))
}
# merge datasets
### 2023/0728/把7個檔merge家裏RAM己經用了76%, 可以看RAM設備,先測試三個tumor/, 因為seurat project己經不是矩陣了, 要用View或tail這個功能檢查。
#merged_seurat <- merge(HB17_background, y = c(HB17_PDX, HB17_tumor, HB30_PDX, HB30_tumor, HB53_background,HB53_tumor),
# add.cell.ids = ls()[3:9], #因為merge後不同object的barcode一樣無法區分
# project = 'HB')
merged_seurat <- merge(HB17_tumor, y = c(HB30_tumor, HB53_tumor),
add.cell.ids = ls()[c(3,4,5)], #因為merge後不同object的barcode一樣無法區分
project = 'HB')
merged_seurat #看一下基本敘述
View(merged_seurat@meta.data) #會出現整個meta.data的樣子
tail(merged_seurat,n=10L) #看最後10筆。
# QC & filtering -----------------------
# create a sample column
merged_seurat$sample <- rownames(merged_seurat@meta.data)
# split sample column
merged_seurat@meta.data <- separate (merged_seurat@meta.data, col = 'sample', into = c('Patient', 'Type', 'Barcode'),
sep = '_') #之前R出現 could not find function "separate"的其怪訊息..
unique(merged_seurat@meta.data$Type) #確定meta.data裏的Type有幾種。
unique(merged_seurat@meta.data$Patient) #確定meta.data裏的Type有幾種。
# calculate mitochondrial percentage
merged_seurat$mitoPercent <- PercentageFeatureSet(merged_seurat, pattern='^MT-')
# explore QC
# filtering (使用 subset()功能把條件內的細胞選取出來
merged_seurat_filtered <- subset(merged_seurat, subset = nCount_RNA > 900 &
nFeature_RNA > 500 &
mitoPercent < 5)
merged_seurat_filtered
merged_seurat
# perform standard workflow steps to figure out if we see any batch effects --------
merged_seurat_filtered <- NormalizeData(object = merged_seurat_filtered)
merged_seurat_filtered <- FindVariableFeatures(object = merged_seurat_filtered)
merged_seurat_filtered <- ScaleData(object = merged_seurat_filtered)
merged_seurat_filtered <- RunPCA(object = merged_seurat_filtered)
ElbowPlot(merged_seurat_filtered)
merged_seurat_filtered <- FindNeighbors(object = merged_seurat_filtered, dims = 1:15)
merged_seurat_filtered <- FindClusters(object = merged_seurat_filtered)
merged_seurat_filtered <- RunUMAP(object = merged_seurat_filtered, dims = 1:15)
# plot (檢查batch effect
p1 <- DimPlot(merged_seurat_filtered, reduction = 'umap', group.by = 'Patient')
p2 <- DimPlot(merged_seurat_filtered, reduction = 'umap', group.by = 'Type',
cols = c('red','green','blue'))
grid.arrange(p1, p2, ncol = 2, nrow = 2)
#由圖表可以看出, 同樣是 tumor, 但這H17, H30, H53分佈在不同位置, 這就是 batch effect.. 不同實驗條件造成
# perform integration to correct for batch effects ------
obj.list <- SplitObject(merged_seurat_filtered, split.by = 'Patient')
for(i in 1:length(obj.list)){
obj.list[[i]] <- NormalizeData(object = obj.list[[i]])
obj.list[[i]] <- FindVariableFeatures(object = obj.list[[i]])
}
# select integration features
features <- SelectIntegrationFeatures(object.list = obj.list)
# find integration anchors (CCA)
anchors <- FindIntegrationAnchors(object.list = obj.list,
anchor.features = features)
# integrate data
seurat.integrated <- IntegrateData(anchorset = anchors)
# Scale data, run PCA and UMAP and visualize integrated data
seurat.integrated <- ScaleData(object = seurat.integrated)
seurat.integrated <- RunPCA(object = seurat.integrated)
seurat.integrated <- RunUMAP(object = seurat.integrated, dims = 1:50)
p3 <- DimPlot(seurat.integrated, reduction = 'umap', group.by = 'Patient')
p4 <- DimPlot(seurat.integrated, reduction = 'umap', group.by = 'Type',
cols = c('red','green','blue'))
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)
Comments
Post a Comment