1
+ # #### determine differentially expressed genes using MAST #####
2
+ # author: Maria Tosches
3
+ # edited by: Juliska E Boer
4
+ # date: 03 Nov 2020
5
+
6
+ DE_Gene_Union = function (SeuratObject ,clusters ,min.pct = 0.4 ,thresh.use = log(2 ), data.info.col ){
7
+ DEgenes = list (character (),character (),character (),character ())
8
+ names(DEgenes ) = c(' union_fdr<0.05' ,' union_fdr<0.01' ,' union_fdr<0.005' ,' union_fdr<1e-5' )
9
+ combinations = t(combn(as.character(clusters ),2 ))
10
+ colnames(combinations ) = c(" ident.1" ," ident.2" )
11
+ nDEgenes = matrix (nrow = nrow(combinations ),ncol = 4 )
12
+ colnames(nDEgenes ) = c(' union_fdr<0.05' ,' union_fdr<0.01' ,' union_fdr<0.005' ,' union_fdr<1e-5' )
13
+ rownames(nDEgenes ) = paste(' cluster' ,combinations [,1 ],' cluster' ,combinations [,2 ],sep = ' _' )
14
+ pb <- txtProgressBar(1 , 100 , style = 3 )
15
+ for (i in 1 : nrow(combinations )){
16
+ a = combinations [i ,1 ]
17
+ b = combinations [i ,2 ]
18
+ suppressMessages(diff.genes <- FindMarkers.MAST(SeuratObject ,a ,b ,min.pct ,thresh.use ,data.info.col ))
19
+ # matrix from FindMarkers.MAST only contains genes that have a fdr<0.05
20
+ DEgenes [[paste(" cluster" ,a ," cluster" ,b , sep = " _" )]] = diff.genes
21
+ DEgenes [[' union_fdr<0.05' ]] = union(DEgenes [[' union_fdr<0.05' ]],rownames(diff.genes ))
22
+ DEgenes [[' union_fdr<0.01' ]] = union(DEgenes [[' union_fdr<0.01' ]],rownames(diff.genes [diff.genes $ fdr < 0.01 ,]))
23
+ DEgenes [[' union_fdr<0.005' ]] = union(DEgenes [[' union_fdr<0.005' ]],rownames(diff.genes [diff.genes $ fdr < 0.005 ,]))
24
+ DEgenes [[' union_fdr<1e-5' ]] = union(DEgenes [[' union_fdr<1e-5' ]],rownames(diff.genes [diff.genes $ fdr < 1e-5 ,]))
25
+ nDEgenes [i ,1 ] = nrow(diff.genes )
26
+ nDEgenes [i ,2 ] = nrow(diff.genes [diff.genes $ fdr < 0.01 ,])
27
+ nDEgenes [i ,3 ] = nrow(diff.genes [diff.genes $ fdr < 0.005 ,])
28
+ nDEgenes [i ,4 ] = nrow(diff.genes [diff.genes $ fdr < 1e-5 ,])
29
+ rm(list = c(' diff.genes' ,' a' ,' b' ))
30
+ setTxtProgressBar(pb , (i * 100 )/ nrow(combinations ))
31
+ }
32
+ DEgenes [[' nDEgenes' ]] = nDEgenes
33
+ DEgenes [[' nDEgene_union' ]] = sapply(DEgenes [1 : 4 ],length )
34
+ return (DEgenes )
35
+ }
36
+
37
+ FindMarkers.MAST <- function (object , id1 , id2 , min.pct , thresh.use ,data.info.col ){
38
+ cells.1 <- names(object @ active.ident [object @ active.ident == id1 ])
39
+ cells.2 <- names(object @ active.ident [object @ active.ident == id2 ])
40
+ cells.to.compare <- c(cells.1 ,cells.2 )
41
+
42
+ raw.neur.counts <- as.matrix(object @ assays $ RNA @ counts [,rownames(object [[]])])
43
+ neur.alldata <- log(raw.neur.counts + 1 )/ log(2 ) # i.e. log2 base
44
+
45
+ genes.use = rownames(object @ assays $ RNA @ data )
46
+ thresh.min = 0
47
+ data.temp1 = round(apply(neur.alldata [genes.use , cells.1 , drop = F ],1 ,function (x )return (length(x [x > thresh.min ])/ length(x ))),3 )
48
+ data.temp2 = round(apply(neur.alldata [genes.use , cells.2 , drop = F ],1 ,function (x )return (length(x [x > thresh.min ])/ length(x ))),3 )
49
+ data.alpha = cbind(data.temp1 ,data.temp2 ); colnames(data.alpha )= c(" pct.1" ," pct.2" )
50
+ alpha.min = apply(data.alpha ,1 ,max )
51
+ names(alpha.min )= rownames(data.alpha )
52
+ genes.use = names(which(alpha.min > min.pct ))
53
+
54
+ neur.data <- neur.alldata [genes.use , cells.to.compare ]
55
+
56
+ symbolid <- rownames(neur.data )
57
+ primerid <- symbolid
58
+ rownames(neur.data ) <- primerid
59
+ neur.cData <- as.data.frame(cbind(as.character(object @ active.ident ), object @ meta.data [,data.info.col ]))
60
+ colnames(neur.cData ) <- c(" cluster" ,colnames(object @ meta.data )[data.info.col ])
61
+ rownames(neur.cData ) <- names(object @ active.ident )
62
+ neur.cData $ cluster <- as.character(neur.cData $ cluster )
63
+ neur.cData <- neur.cData [cells.to.compare ,]
64
+ neur.cData $ wellKey <- as.character(cells.to.compare )
65
+
66
+ neur.fData <- cbind(primerid ,symbolid )
67
+ neur.fData <- as.data.frame(neur.fData )
68
+ neur.fData [,1 ] <- as.character(neur.fData [,1 ])
69
+
70
+ neur.to.compare <- FromMatrix(neur.data , neur.cData , neur.fData )
71
+
72
+ colData(neur.to.compare )$ cngeneson <- scale(colSums(assay(neur.to.compare )> 0 ))
73
+ colData(neur.to.compare )$ cluster <- as.factor(colData(neur.to.compare )$ cluster )
74
+ colData(neur.to.compare )$ cluster <- relevel(colData(neur.to.compare )$ cluster , as.character(id1 ))
75
+
76
+ zlmCond <- zlm(~ cluster + cngeneson , neur.to.compare )
77
+ summaryCond <- summary(zlmCond , doLRT = paste0(" cluster" , id2 ))
78
+
79
+ summaryDt <- summaryCond $ datatable
80
+ summaryDt2 <- summaryDt
81
+ set1 <- summaryDt2 [(contrast == paste0(" cluster" , id2 ) & component == " H" ),c(1 ,4 )]
82
+ colnames(set1 ) <- c(" primerid" ," Pr" )
83
+ set2 <- summaryDt2 [(contrast == paste0(" cluster" , id2 ) & component == " logFC" ), .(primerid , coef , ci.hi , ci.lo )]
84
+
85
+ fcHurdle <- merge(set1 ,set2 , by = " primerid" )
86
+ fcHurdle [,fdr : = p.adjust(Pr , " fdr" )]
87
+
88
+ expMean = function (x ) {
89
+ return (log(mean(exp(x )- 1 )+ 1 ))
90
+ }
91
+
92
+ data.avg.diff <- as.matrix(object @ assays $ RNA @ data [rownames(object @ assays $ RNA @ data ) %in% genes.use , colnames(object @ assays $ RNA @ data ) %in% cells.to.compare ])
93
+ data1 <- data.avg.diff [,cells.1 ]
94
+ data2 <- data.avg.diff [,cells.2 ]
95
+ genes.use2 <- rownames(data.avg.diff )
96
+ avg_diff = unlist(lapply(genes.use2 ,function (x )(expMean(as.numeric(data1 [x ,]))- expMean(as.numeric(data2 [x ,])))))
97
+ names(avg_diff )<- rownames(data.avg.diff )
98
+
99
+ fcHurdle2 <- as.data.frame(fcHurdle )
100
+ rownames(fcHurdle2 )<- fcHurdle2 $ primerid
101
+ avg_diff <- avg_diff [rownames(fcHurdle2 )]
102
+
103
+ fcHurdle3 <- cbind(fcHurdle2 , avg_diff )
104
+
105
+ fcHurdleSig <- fcHurdle3 [fcHurdle3 $ fdr < 0.05 & abs(fcHurdle3 $ avg_diff )> thresh.use ,]
106
+ fcHurdleSig <- fcHurdleSig [complete.cases(fcHurdleSig ),]
107
+ fcHurdleSig <- cbind(fcHurdleSig [,c(2 : 3 ,6 : 7 )],data.alpha [row.names(fcHurdleSig ),])
108
+ fcHurdleSig <- fcHurdleSig [order(abs(fcHurdleSig $ avg_diff ),decreasing = T ),]
109
+
110
+ return (fcHurdleSig )
111
+
112
+ }
0 commit comments