-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathanalyzeBead.R
141 lines (124 loc) · 6.47 KB
/
analyzeBead.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Automated differential expression analysis for Illumina beadarrays (HT12 V4 arrays).
# Uses idat files as input.
# Author: Anand Mayakonda
# Usage:
# source("AnalyzeBead.R")
# topTable = beadAnalyze(idats = c("file1.idat","file2.idat","file3.idat","file4.idat"),names = c("control1","control2","treated1","treated2"),
# condition = c("control","control","treated","treated"),ref.condition = "treated", fdr = 0.05, plotPCA = T)
#
# Arguments:
# idats = vector of idat files to be analyzed
# names = sample names
# condition = condition status of each sample
# ref.condition = reference condition to be used
# fdr = fdr cutoff to annotate Differentially Expressed Genes.
# plotPCA = if TRUE performs PCA and plots first two PCs
#
# returns toptable (all genes with expression and annotation, ordered according to FDR)
##########################################################################################################
#Required packages
pkgs = c('beadarray', 'illuminaHumanv4.db', 'limma', 'ggplot2', 'ggrepel')
sapply(X = pkgs, require, character.only = T)
#function to automate differential expression (from limmaDE in beadarray).
limma.autoDE = function (summaryData, SampleGroup, DesignMatrix = NULL, makeWts = TRUE, ...)
{
if (is.null(SampleGroup))
stop("You must define a SampleGroup for the differential expression\n")
if (SampleGroup %in% colnames(pData(summaryData)))
SampleGroup <- pData(summaryData)[, SampleGroup]
else {
print(paste(colnames(pData(summaryData)), collapse = " "))
stop("The SampleGroup argument must be a column name in the phenoData slot. See above for list of valid strings")
}
design <- model.matrix(~0 + as.factor(SampleGroup))
colnames(design) <- as.character(levels(as.factor(SampleGroup)))
contrast <- vector()
for (a in 1:length(levels(as.factor(SampleGroup)))) {
for (b in 1:length(levels(as.factor(SampleGroup)))) {
if (a != b) {
if (a < b) {
contrast[length(contrast) + 1] <- paste(levels(as.factor(SampleGroup))[a],
levels(as.factor(SampleGroup))[b], sep = "-")
}
}
}
}
if (makeWts) {
wts <- arrayWeights(exprs(summaryData), design)
message("Calculating array weights")
message("Array weights")
wts
fit <- lmFit(exprs(summaryData), design, weights = wts)
}
else fit <- lmFit(exprs(summaryData), design)
cnt <- paste(colnames(design)[1], colnames(design)[2], sep = "-")
cMat <- makeContrasts(contrasts = contrast, levels = design)
fit2 <- contrasts.fit(fit, cMat)
efit <- eBayes(fit2)
# deResults <- new("limmaResults", LogFC = efit$coef, PValue = efit$p.value,
# LogOdds = efit$lods, featureData = summaryData@featureData,
# phenoData = summaryData@phenoData)
#de.df = topTable(fit2, coef=1, adjust="BH",number = 'all')
#de.df = data.frame(LogFC = efit$coef, PValue = efit$p.value, LogOdds = efit$lods)
return(efit)
}
#function to perform and plot PCA results
pca_plot = function(mat, pdata, label = F){
mat.prcomp = prcomp(x = t(mat))
pca.dat = mat.prcomp$x[,1:2]
pca.dat = cbind(pca.dat, pdata[rownames(pca.dat),])
if(label){
p = ggplot(data = pca.dat, aes(x = PC1, y = PC2, color = sampleFac, label = sectionNames))+geom_point()+geom_text_repel()+theme(legend.position = 'bottom')
}else{
p = ggplot(data = pca.dat, aes(x = PC1, y = PC2, color = sampleFac))+geom_point()+theme(legend.position = 'bottom')
}
print(p)
}
#main function to run.
beadAnalyze = function(idats, names, condition, ref.condition, fdr = 0.05, plotPCA = F){
bead.batch = readIdatFiles(idatFiles = idats) #read idat files
pData(bead.batch)[,1] = names #change default name to user provided names
rownames(pData(bead.batch)) = names
pData(bead.batch)[,"sampleFac"] = condition #add conditions
pData(bead.batch)[,"sampleFac"] = as.factor(pData(bead.batch)[,"sampleFac"]) #change into factors
pData(bead.batch)[,2] = relevel(pData(bead.batch)[,2],ref = ref.condition) #relevel the factors
#normalize (available options: options are "quantile", "qspline", "vsn", "rankInvariant", "median" and "none"). Change accordingly if necessary.
bead.eset = normaliseIllumina(BSData = bead.batch,method = "quantile",transform = "log2")
eset.exprs = as.data.frame(exprs(bead.eset)) #get exprs
names(eset.exprs) = names
is.na(eset.exprs) = do.call(cbind,lapply(eset.exprs, is.infinite)) #remove inf values if any.
eset.exprs = as.matrix(eset.exprs[complete.cases(eset.exprs),])
phenoData = new(Class = 'AnnotatedDataFrame',data = pData(bead.batch)) # create new pData
eset = ExpressionSet(assayData = as.matrix(eset.exprs),phenoData = phenoData,annotation = 'Humanv4') #create new expressionSet object
eset = addFeatureData(eset,toAdd = c("SYMBOL", "PROBEQUALITY", "CODINGZONE", "PROBESEQUENCE", "GENOMICLOCATION")) #add other features from IlluminaV4 pacakge.
exprs.df = cbind(exprs(eset),as(eset@featureData,Class = 'data.frame'))
exprs.df = exprs.df[,-grep(pattern = 'Row.names',x = colnames(exprs.df))]
if(length(pData(eset)[,1]) == 2){
cat('no replicates available; can not perform differntial expression analysis \n returning just expression table')
return(exprs.df)
} else{
eset.de = limma.autoDE(summaryData = eset,SampleGroup = 'sampleFac') #run differnetial analysis.
tt = topTable(eset.de,number = 'all') #get toptable.
res = merge(exprs.df,tt,by = 'row.names') #merge everything.
res = res[order(res$adj.P.Val),] #order according to fdr.
rownames(res) = res$Row.names
res = res[,-1]
#get up and down probes
downProbes = rownames(res[which(res$adj.P.Val < fdr & res$logFC < 0),])
upProbes = rownames(res[which(res$adj.P.Val < fdr & res$logFC > 0),])
#annotate deg status
res$down = as.integer(rownames(res) %in% downProbes)
res$up = as.integer(rownames(res) %in% upProbes)
res$up = factor(x = res$up, levels = c(1,0), labels = c('up', 'none'))
res$down = factor(x = res$down, levels = c(1,0), labels = c('down', 'none'))
res$deg = interaction(res$up, res$down)
res$deg = as.character(factor(x = res$deg, levels = c("up.down" ,"none.down","up.none" ,"none.none"), labels = c('none', 'down', 'up', 'none')))
res = subset(res, select = -c(up, down))
#Perform Principal Component Analysis (PCA) and plot first two Principal Components
if(plotPCA){
pca_plot(mat = res[,names], pdata = pData(eset), label = T)
}
#return(list(results = res, eset = eset))
return(res)
}
}