2023/0727_使用 Seurat的2700 PBMC資料_了解scRNA-seq 分析流程

Source: https://satijalab.org/seurat/articles/pbmc3k_tutorial.html # Load the PBMC dataset (資料輸入) pbmc.data <- Read10X(data.dir = "D:/YCT/scRNAseq/pbmc3k/filtered_gene_bc_matrices/hg19/") # Initialize the Seurat object with the raw (non-normalized data). pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200) #Standard pre-processing workflow # The [[ operator can add columns to object metadata. This is a great place to stash QC stats pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-") # Visualize QC metrics as a violin plot VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) # FeatureScatter is typically used to visualize feature-feature relationships, but can be used # for anything calculated by the object, i.e. columns in object metadata, PC scores etc. plot1 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt") plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") plot1 + plot2 #選取一部份的 細胞做後續分析.資料存入另一個pbmc1 pbmc1 <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5) #Normalizing the data pbmc1 <- NormalizeData(pbmc1, normalization.method = "LogNormalize", scale.factor = 10000) #feature selection/ Identification of highly variable features pbmc1 <- FindVariableFeatures(pbmc1, selection.method = "vst", nfeatures = 2000) # Identify the 10 most highly variable genes top10 <- head(VariableFeatures(pbmc1), 10) # plot variable features with and without labels plot1 <- VariableFeaturePlot(pbmc1) plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) plot1 + plot2 #Scaling the data all.genes <- rownames(pbmc1) pbmc1 <- ScaleData(pbmc1, features = all.genes) #Perform linear dimensional reduction pbmc1 <- RunPCA(pbmc1, features = VariableFeatures(object = pbmc1)) # Examine and visualize PCA results a few different ways print(pbmc1[["pca"]], dims = 1:5, nfeatures = 5) #Seurat provides several useful ways of visualizing both cells and features that define the PCA, including VizDimReduction(), DimPlot(), and DimHeatmap() VizDimLoadings(pbmc1, dims = 1:2, reduction = "pca") DimPlot(pbmc1, reduction = "pca") DimHeatmap(pbmc1, dims = 1, cells = 500, balanced = TRUE) DimHeatmap(pbmc1, dims = 1:15, cells = 500, balanced = TRUE) #Determine the ‘dimensionality’ of the dataset pbmc1 <- JackStraw(pbmc1, num.replicate = 100) pbmc1 <- ScoreJackStraw(pbmc1, dims = 1:20) JackStrawPlot(pbmc1, dims = 1:15) ElbowPlot(pbmc1) #Cluster the cells pbmc1 <- FindNeighbors(pbmc1, dims = 1:10) pbmc1 <- FindClusters(pbmc1, resolution = 0.5) #Maximum modularity in 10 random starts: 0.8728 # Look at cluster IDs of the first 5 cells head(Idents(pbmc1), 5) #Run non-linear dimensional reduction (UMAP/tSNE) pbmc1 <- RunUMAP(pbmc1, dims = 1:10) # note that you can set `label = TRUE` or use the LabelClusters function to help label # individual clusters DimPlot(pbmc1, reduction = "umap") #暫時存下這個階段的資料, saveRDS(pbmc1, file="D:/YCT/scRNAseq/pbmc3k/demo1.rds") #Cluster biomarkers/把每一個cluster的特徵基因找出來。 #特定某一個群體2/ find all markers of cluster 2 cluster2.markers <- FindMarkers(pbmc1, ident.1 = 2, min.pct = 0.25) head(cluster2.markers, n = 5) # 特定某一個群體5 與 group0, 3的不同/ find all markers distinguishing cluster 5 from clusters 0 and 3 cluster5.markers <- FindMarkers(pbmc1, ident.1 = 5, ident.2 = c(0, 3), min.pct = 0.25) head(cluster5.markers, n = 5) #主要使用的功能/把每一個cluster的特徵基因找出來。/find markers for every cluster compared to all remaining cells, report only the positive ones pbmc1.markers <- FindAllMarkers(pbmc1, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) pbmc1.markers %>% group_by(cluster) %>% slice_max(n = 2, order_by = avg_log2FC) #ROC test returns the ‘classification power’ for any individual marker (ranging from 0 - random, to 1 - perfect). cluster0.markers <- FindMarkers(pbmc1, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE) VlnPlot(pbmc1, features = c("MS4A1", "CD79A")) # you can plot raw counts as well VlnPlot(pbmc1, features = c("NKG7", "PF4"), slot = "counts", log = TRUE) FeaturePlot(pbmc1, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP", "CD8A")) #將每個cluster內前幾名特別基因用來分析heatmap pbmc1.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10 DoHeatmap(pbmc1, features = top10$gene) + NoLegend() #cluster 命名 new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC", "Platelet") names(new.cluster.ids) <- levels(pbmc1) pbmc1 <- RenameIdents(pbmc1, new.cluster.ids) DimPlot(pbmc1, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() #暫時存下這個階段的資料, saveRDS(pbmc1, file= "D:/YCT/scRNAseq/pbmc3k/demo2.rds")

Comments

Popular posts from this blog

2022_1204_下載FPKM到TPM

2023_0730/ 整合 snRNA-seq的資料, 去除Batch effect /Bioinformagician