Skip to content

Commit 389f427

Browse files
authoredFeb 17, 2022
Add files via upload
1 parent 5ce0b0e commit 389f427

11 files changed

+5486
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
##### alternative integration: correlation analysis #####
2+
# author: Juliska E Boer
3+
# edidited: L.L. van de Haar
4+
# date: 21 01 2021
5+
setwd("/Volumes/HD_LLH/python/STAR_E18_wt/FINAL/Correlation")
6+
7+
#load packages
8+
library(MAST)
9+
library(Seurat)
10+
library(tidyverse)
11+
library(heatmaply)
12+
13+
#load functions for identifying differentially expressed genes (DEGs) using MAST and for calculating the correlations
14+
source("Correlation_CalcDEG.R")
15+
source("Correlation_CalcCorr.R")
16+
17+
#read in the data
18+
load("../data/output/E18WT_Scanpy_Seurat_obj.RData") #E18WT
19+
load("../data/output/final.E18Brn3a_Scanpy_Seurat_obj.RData") #E18 Brn3a
20+
21+
#determine DEGs for each dataset, and save information (this function takes some time)
22+
DEG.final.E18WT <- DE_Gene_Union(final.E18WT, levels(final.E18WT@active.ident), data.info.col = 1:9)
23+
save(DEG.final.E18WT, file = "../data/output/DEG-final-E18WT.RData")
24+
25+
DEG.final.E18Brn3a <- DE_Gene_Union(final.E18Brn3a, levels(final.E18Brn3a@active.ident), data.info.col = 1:5)
26+
save(DEG.final.E18Brn3a, file = "../data/output/DEG-final-E18Brn3a.RData")
27+
28+
#prepare input for correlation function
29+
#average expression matrices filtered on DEG
30+
31+
#load DEG result (vectors behind the objects are the cluster names, these are used for plotting)
32+
load("../data/output/DEG-final-E18WT.RData") #c("Ad_Hb01", "Ad_Hb02A", "Ad_Hb02B", "Ad_Hb04", "Ad_Hb05", "Ad_Hb06", "Ad_Hb08", "Ad_Hb09", "Ad_Hb10", "Ad_Hb11", "Ad_Hb13", "Ad_Hb14", "Ad_VHb01", "Ad_VHb02", "Ad_VHb03", "Ad_VHb04", "Ad_Hb16")
33+
load("../data/output/DEG-final-E18Brn3a.RData") #c("La_Hb01", "La_Hb02", "La_Hb03", "La_Hb04", "La_Hb05", "La_Hb06", "La_Hb07", "La_Hb08", "La_Hb09", "La_Hb10", "La_Hb11", "La_Hb12", "La_Hb13", "La_Hb14", "La_Hb15", "Olf")
34+
35+
#set the datasets and DEGs you want to correlate
36+
expr_table1 <- AverageExpression(final.E18WT, assays = "RNA", slot="counts")[[1]]
37+
expr_table2 <- AverageExpression(final.E18Brn3a, assays = "RNA", slot="counts")[[1]]
38+
DEgenes1 <- DEG.final.E18WT$`union_fdr<0.05`
39+
DEgenes2 <- DEG.final.E18Brn3a$`union_fdr<0.05`
40+
41+
DEgenesSpecies1 <- DEgenes1
42+
DEgenesSpecies2 <- DEgenes2
43+
#ExpressionTable1 <- expr_table1
44+
#ExpressionTable2 <- expr_table2
45+
46+
nDESp1 <- 2729
47+
nDESp2 <- 512
48+
49+
#execute correlation function
50+
comp.intersect <- SpPermute(expr_table1, DEgenes1, expr_table2, DEgenes2, nPermutations=1000, genes.use= "intersect",corr.method="spearman")
51+
#get correlation matrix
52+
comp_table.intersect <- t(comp.intersect[[1]][1:ncol(expr_table1),(ncol(expr_table1)+1):nrow(comp.intersect[[1]])])
53+
#change order of correlation matrix (take the commented vectors behind the DEG results)
54+
comp_table.intersect <- comp_table.intersect[c("0", "1", "6", "2", "4", "3", "5", "7", "8"),
55+
c("0", "3", "4", "1", "2")]
56+
#if the developmental Hb is correlated, then we add the "Cluster" prefix.
57+
#uncomment line below (COL = expr_table1 ~ ROW = expr_table2)
58+
rownames(comp_table.intersect) <- paste("Cluster", rownames(comp_table.intersect), sep = "_")
59+
60+
#get signficance matrix
61+
p_table.intersect <- t(comp.intersect[[3]][1:ncol(expr_table1),(ncol(expr_table1)+1):nrow(comp.intersect[[1]])])
62+
#change order of significance matrix (take the commented vectors behind the DEG results)
63+
p_table.intersect <- p_table.intersect[c("0", "1", "6", "2", "4", "3", "5", "7", "8"),
64+
c("0", "3", "4", "1", "2")]
65+
#if the developmental Hb is correlated, then we add the "Cluster" prefix.
66+
#uncomment line below (COL = expr_table1 ~ ROW = expr_table2)
67+
rownames(p_table.intersect) <- paste("Cluster", rownames(p_table.intersect), sep = "_")
68+
69+
#change minor details in significance matrix for nice plot results
70+
p_vals <- p_table.intersect
71+
p_table.intersect <- (-p_table.intersect)
72+
p_vals[p_vals>0.05] <- NA
73+
p_vals[p_vals<=0.05] <- "."
74+
75+
#plot correlations in correlation matrix ordered by hierarchical clustering
76+
cols <- colorRampPalette(c("darkblue", "white","darkred"))
77+
78+
heatmaply_cor(comp_table.intersect, cellnote=p_vals, colors = cols(200), node_type="scatter",
79+
label_names=c("Embryo", "Pandey", "Correlation"), limits = c(min(comp_table.intersect), max(comp_table.intersect)),
80+
cellnote_color="black", cellnote_size=8, cellnote_textposition="middle center", point_size_mat=p_table.intersect,
81+
width = 20, height = 8, point_size_name="p-value")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
##### determine correlations and signficance #####
2+
# author: Maria Tosches
3+
# edited by: Juliska E Boer
4+
# date: 03 Nov 2020
5+
6+
SpPermute = function(ExpressionTable1, DEgenes1, ExpressionTable2, DEgenes2, nPermutations = 1000, genes.use='intersect', corr.method = 'spearman'){
7+
8+
#Step1: Take intersect of DEgenes for analysis
9+
DEgenes = intersect(DEgenesSpecies1,DEgenesSpecies2)
10+
11+
#Step2: Prune Expression Tables & Remove rows with no expression
12+
Sp1 = ExpressionTable1[rownames(ExpressionTable1) %in% DEgenes1,]
13+
Sp1 = Sp1[rowSums (Sp1)!=0,]
14+
Sp2 = ExpressionTable2[rownames(ExpressionTable2) %in% DEgenes2,]
15+
Sp2 = Sp2[rowSums (Sp2)!=0,]
16+
17+
#Step3: Scale Expression Tables by gene expression sum
18+
Sp1 <- sweep(Sp1,MARGIN=1,FUN="/",STATS=rowSums(Sp1))
19+
Sp2 <- sweep(Sp2,MARGIN=1,FUN="/",STATS=rowSums(Sp2))
20+
21+
#Step4: Merge Expression Tables
22+
geTable = merge(Sp1,Sp2, by='row.names', all=F)
23+
rownames(geTable) = geTable$Row.names
24+
geTable = geTable[,2:ncol(geTable)]
25+
26+
#Step5: Correlation
27+
#5a: Correlation
28+
Corr.Coeff.Table = cor(geTable,method=corr.method)
29+
30+
#5b: Shuffle data
31+
shuffled.cor.list = list()
32+
pb <- txtProgressBar(1, 100, style=3)
33+
34+
for (i in 1:nPermutations){
35+
shuffled = apply(geTable[,1:ncol(Sp1)],1,sample)
36+
shuffled2 = apply(geTable[,(ncol(Sp1)+1):ncol(geTable)],1,sample)
37+
shuffled = cbind(t(shuffled),t(shuffled2))
38+
shuffled.cor = cor(shuffled,method=corr.method)
39+
shuffled.cor.list[[i]] = shuffled.cor
40+
rm(list=c('shuffled','shuffled2','shuffled.cor'))
41+
if ((i %% 100) ==0){
42+
setTxtProgressBar(pb, (i*100)/nPermutations)
43+
}
44+
}
45+
46+
p.value.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable))
47+
rownames(p.value.table) = colnames(geTable)
48+
colnames(p.value.table) = colnames(geTable)
49+
50+
shuffled.mean.table = matrix(ncol=ncol(geTable), nrow = ncol(geTable))
51+
rownames(shuffled.mean.table) = colnames(geTable)
52+
colnames(shuffled.mean.table) = colnames(geTable)
53+
54+
a = combn(1:ncol(geTable),2)
55+
for (i in 1:ncol(a)){
56+
cor.scores = sapply(shuffled.cor.list,"[",a[1,i],a[2,i])
57+
shuffled.mean.table[a[1,i],a[2,i]] = mean(cor.scores)
58+
shuffled.mean.table[a[2,i],a[1,i]] = mean(cor.scores)
59+
p.value = mean(abs(cor.scores)>=abs(Corr.Coeff.Table[a[1,i],a[2,i]]))
60+
p.value.table[a[1,i],a[2,i]] = p.value
61+
p.value.table[a[2,i],a[1,i]] = p.value
62+
rm(list=c('cor.scores','p.value'))
63+
setTxtProgressBar(pb, (i*100)/ncol(a))
64+
}
65+
66+
#Step6: Return variables
67+
68+
list.to.return = list(Corr.Coeff.Table,shuffled.mean.table,p.value.table,DEgenes,rownames(geTable),length(DEgenes),nDESp1,nDESp2)
69+
names(list.to.return) = c('corr.coeff','shuffled_correlation_score_means','p.value','DEgenes_intersect','DEgenes_in_analysis','nDEgenes_in_analysis','nDEgenes_Sp1','nDEgenes_Sp2')
70+
return(list.to.return)
71+
72+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
##### determine differentially expressed genes using MAST #####
2+
# author: Maria Tosches
3+
# edited by: Juliska E Boer
4+
# date: 03 Nov 2020
5+
6+
DE_Gene_Union = function(SeuratObject,clusters,min.pct=0.4,thresh.use=log(2), data.info.col){
7+
DEgenes = list(character(),character(),character(),character())
8+
names(DEgenes) = c('union_fdr<0.05','union_fdr<0.01','union_fdr<0.005','union_fdr<1e-5')
9+
combinations = t(combn(as.character(clusters),2))
10+
colnames(combinations) = c("ident.1","ident.2")
11+
nDEgenes = matrix(nrow=nrow(combinations),ncol=4)
12+
colnames(nDEgenes) = c('union_fdr<0.05','union_fdr<0.01','union_fdr<0.005','union_fdr<1e-5')
13+
rownames(nDEgenes) = paste('cluster',combinations[,1],'cluster',combinations[,2],sep='_')
14+
pb <- txtProgressBar(1, 100, style=3)
15+
for (i in 1:nrow(combinations)){
16+
a = combinations[i,1]
17+
b = combinations[i,2]
18+
suppressMessages(diff.genes <- FindMarkers.MAST(SeuratObject,a,b,min.pct,thresh.use,data.info.col))
19+
#matrix from FindMarkers.MAST only contains genes that have a fdr<0.05
20+
DEgenes[[paste("cluster",a,"cluster",b, sep="_")]] = diff.genes
21+
DEgenes[['union_fdr<0.05']] = union(DEgenes[['union_fdr<0.05']],rownames(diff.genes))
22+
DEgenes[['union_fdr<0.01']] = union(DEgenes[['union_fdr<0.01']],rownames(diff.genes[diff.genes$fdr<0.01,]))
23+
DEgenes[['union_fdr<0.005']] = union(DEgenes[['union_fdr<0.005']],rownames(diff.genes[diff.genes$fdr<0.005,]))
24+
DEgenes[['union_fdr<1e-5']] = union(DEgenes[['union_fdr<1e-5']],rownames(diff.genes[diff.genes$fdr<1e-5,]))
25+
nDEgenes[i,1] = nrow(diff.genes)
26+
nDEgenes[i,2] = nrow(diff.genes[diff.genes$fdr<0.01,])
27+
nDEgenes[i,3] = nrow(diff.genes[diff.genes$fdr<0.005,])
28+
nDEgenes[i,4] = nrow(diff.genes[diff.genes$fdr<1e-5,])
29+
rm(list = c('diff.genes','a','b'))
30+
setTxtProgressBar(pb, (i*100)/nrow(combinations))
31+
}
32+
DEgenes[['nDEgenes']] = nDEgenes
33+
DEgenes[['nDEgene_union']] = sapply(DEgenes[1:4],length)
34+
return(DEgenes)
35+
}
36+
37+
FindMarkers.MAST <- function(object, id1, id2, min.pct, thresh.use,data.info.col){
38+
cells.1 <- names(object@active.ident[object@active.ident == id1])
39+
cells.2 <- names(object@active.ident[object@active.ident == id2])
40+
cells.to.compare <- c(cells.1,cells.2)
41+
42+
raw.neur.counts <- as.matrix(object@assays$RNA@counts[,rownames(object[[]])])
43+
neur.alldata <- log(raw.neur.counts + 1)/log(2) # i.e. log2 base
44+
45+
genes.use = rownames(object@assays$RNA@data)
46+
thresh.min = 0
47+
data.temp1=round(apply(neur.alldata[genes.use, cells.1, drop = F],1,function(x)return(length(x[x>thresh.min])/length(x))),3)
48+
data.temp2=round(apply(neur.alldata[genes.use, cells.2, drop = F],1,function(x)return(length(x[x>thresh.min])/length(x))),3)
49+
data.alpha=cbind(data.temp1,data.temp2); colnames(data.alpha)=c("pct.1","pct.2")
50+
alpha.min=apply(data.alpha,1,max)
51+
names(alpha.min)=rownames(data.alpha)
52+
genes.use=names(which(alpha.min>min.pct))
53+
54+
neur.data <- neur.alldata[genes.use, cells.to.compare]
55+
56+
symbolid <- rownames(neur.data)
57+
primerid <- symbolid
58+
rownames(neur.data) <- primerid
59+
neur.cData <- as.data.frame(cbind(as.character(object@active.ident), object@meta.data[,data.info.col]))
60+
colnames(neur.cData) <- c("cluster",colnames(object@meta.data)[data.info.col])
61+
rownames(neur.cData) <- names(object@active.ident)
62+
neur.cData$cluster <- as.character(neur.cData$cluster)
63+
neur.cData <- neur.cData[cells.to.compare,]
64+
neur.cData$wellKey <- as.character(cells.to.compare)
65+
66+
neur.fData <- cbind(primerid,symbolid)
67+
neur.fData <- as.data.frame(neur.fData)
68+
neur.fData[,1] <- as.character(neur.fData[,1])
69+
70+
neur.to.compare <- FromMatrix(neur.data, neur.cData, neur.fData)
71+
72+
colData(neur.to.compare)$cngeneson <- scale(colSums(assay(neur.to.compare)>0))
73+
colData(neur.to.compare)$cluster <- as.factor(colData(neur.to.compare)$cluster)
74+
colData(neur.to.compare)$cluster <- relevel(colData(neur.to.compare)$cluster, as.character(id1))
75+
76+
zlmCond <- zlm(~ cluster + cngeneson, neur.to.compare)
77+
summaryCond <- summary(zlmCond, doLRT=paste0("cluster", id2))
78+
79+
summaryDt <- summaryCond$datatable
80+
summaryDt2 <- summaryDt
81+
set1 <- summaryDt2[(contrast==paste0("cluster", id2) & component == "H"),c(1,4)]
82+
colnames(set1) <- c("primerid","Pr")
83+
set2 <- summaryDt2[(contrast==paste0("cluster", id2) & component == "logFC"), .(primerid, coef, ci.hi, ci.lo)]
84+
85+
fcHurdle <- merge(set1,set2, by="primerid")
86+
fcHurdle[,fdr:=p.adjust(Pr, "fdr")]
87+
88+
expMean=function(x) {
89+
return(log(mean(exp(x)-1)+1))
90+
}
91+
92+
data.avg.diff <- as.matrix(object@assays$RNA@data[rownames(object@assays$RNA@data) %in% genes.use, colnames(object@assays$RNA@data) %in% cells.to.compare])
93+
data1 <- data.avg.diff[,cells.1]
94+
data2 <- data.avg.diff[,cells.2]
95+
genes.use2<-rownames(data.avg.diff)
96+
avg_diff=unlist(lapply(genes.use2,function(x)(expMean(as.numeric(data1[x,]))-expMean(as.numeric(data2[x,])))))
97+
names(avg_diff)<-rownames(data.avg.diff)
98+
99+
fcHurdle2 <- as.data.frame(fcHurdle)
100+
rownames(fcHurdle2)<-fcHurdle2$primerid
101+
avg_diff <- avg_diff[rownames(fcHurdle2)]
102+
103+
fcHurdle3 <- cbind(fcHurdle2, avg_diff)
104+
105+
fcHurdleSig <- fcHurdle3[fcHurdle3$fdr<0.05 & abs(fcHurdle3$avg_diff)>thresh.use,]
106+
fcHurdleSig <- fcHurdleSig[complete.cases(fcHurdleSig),]
107+
fcHurdleSig <- cbind(fcHurdleSig[,c(2:3,6:7)],data.alpha[row.names(fcHurdleSig),])
108+
fcHurdleSig <- fcHurdleSig[order(abs(fcHurdleSig$avg_diff),decreasing=T),]
109+
110+
return(fcHurdleSig)
111+
112+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
##### create Seurat object after downstream analysis in SCANPY #####
2+
# author: Juliska E Boer
3+
# edited: L.L. van de Haar
4+
# date: 21 01 2021
5+
6+
#load packages
7+
setwd("/Volumes/HD_LLH/python/STAR_E18_wt/FINAL")
8+
library(Seurat)
9+
10+
#read raw- and batch corrected expression matrices and metadata
11+
raw_expr <- read.table("data/output/E18WT_rawexpr.csv", header=TRUE, sep=",", dec=".", row.names = 1)
12+
#batch_expr <- read.table("data/input/May2020_EmbryoHb_Seurat_batchexpr.csv", header=TRUE, sep=",", dec=".", row.names = 1)
13+
meta <- read.table("data/output/E18WT_metadata.csv", header=TRUE, sep=",", dec=".", row.names = 1)
14+
15+
raw_expr <- as.data.frame(t(raw_expr))
16+
#create Seurat object using raw counts and metadata
17+
final.E18WT <- CreateSeuratObject(counts = raw_expr, assay="RNA", meta.data=meta)
18+
#include batch corrected matrix as scaled data matrix
19+
#final.embryo@assays$RNA@scale.data <- as.matrix(batch_expr)
20+
#set Louvain clustering as identity
21+
Idents(final.E18WT) <- final.E18WT[["louvain"]]
22+
23+
#save Seurat object
24+
save(final.E18WT, file="data/output/E18WT_Scanpy_Seurat_obj.RData")
25+
26+
#save average expression per cluster for MAGMA analysis
27+
embryo_avg <- AverageExpression(final.embryo, assays = "RNA")[[1]]
28+
write.table(embryo_avg, file="data/output/MAGMA/Jun2020_GWAS_embryo.csv")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
##### create Seurat object after downstream analysis in SCANPY #####
2+
# author: Juliska E Boer
3+
# edited: L.L. van de Haar
4+
# date: 21 01 2021
5+
6+
#load packages
7+
setwd("/Volumes/HD_LLH/python/STAR_E18_wt/FINAL")
8+
library(Seurat)
9+
10+
#read raw- and batch corrected expression matrices and metadata
11+
raw_expr <- read.table("/Volumes/HD_LLH/Juliska/data/output/embryo_hb/E18_Brn3a-tLacZ_rawexpr.csv", header=TRUE, sep=",", dec=".", row.names = 1)
12+
#batch_expr <- read.table("data/input/May2020_EmbryoHb_Seurat_batchexpr.csv", header=TRUE, sep=",", dec=".", row.names = 1)
13+
meta <- read.table("/Volumes/HD_LLH/Juliska/data/output/embryo_hb/E18_Brn3a-tLacZ_metadata.csv", header=TRUE, sep=",", dec=".", row.names = 1)
14+
15+
raw_expr <- as.data.frame(t(raw_expr))
16+
#create Seurat object using raw counts and metadata
17+
final.E18Brn3a <- CreateSeuratObject(counts = raw_expr, assay="RNA", meta.data=meta)
18+
#include batch corrected matrix as scaled data matrix
19+
#final.embryo@assays$RNA@scale.data <- as.matrix(batch_expr)
20+
#set Louvain clustering as identity
21+
Idents(final.E18Brn3a) <- final.E18Brn3a[["louvain"]]
22+
23+
#save Seurat object
24+
save(final.E18Brn3a, file="data/output/final.E18Brn3a_Scanpy_Seurat_obj.RData")
25+
26+
#save average expression per cluster for MAGMA analysis
27+
embryo_avg <- AverageExpression(final.embryo, assays = "RNA")[[1]]
28+
write.table(embryo_avg, file="data/output/MAGMA/Jun2020_GWAS_embryo.csv")

0 commit comments

Comments
 (0)
Please sign in to comment.