-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRank_SE_associated_genes_by_expression_levels.R
75 lines (55 loc) · 2.75 KB
/
Rank_SE_associated_genes_by_expression_levels.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
setwd("F:/github/Kanazawa/RNA-seq")
library(ggplot2)
library(tidyverse)
library(pals)
library(ggrepel)
get_anno <- function(data){
df <- read.csv(data, header = T, stringsAsFactors = F, sep = "\t")
colnames(df)[1] <- "PeakID"
colnames(df)[12] <- "ENSEMBL"
return(df)
}
Add_1 <- function(vec) return(vec + 1)
Make_plot_df <- function(anno1, anno2, TPM, colname_of_WT_scale_TPM){
anno1_TPM <- left_join(anno1, TPM, by = "ENSEMBL") %>% distinct(ENSEMBL, .keep_all = TRUE)
anno2_TPM <- left_join(anno2, TPM, by = "ENSEMBL") %>% distinct(ENSEMBL, .keep_all = TRUE)
anno_TPM <- rbind(anno1_TPM,anno2_TPM)
anno_TPM <- anno_TPM %>% distinct(ENSEMBL, .keep_all = TRUE)
WT <- which(colnames(anno_TPM) == colname_of_WT_scale_TPM)
rm <- !is.finite(anno_TPM[,WT])
anno_TPM <- anno_TPM[!rm,]
return(anno_TPM)
}
get_plot <- function(df, condition1, condition2, selected_gene, show_label){ #FC := condition1/condition2
FC <- (df[,condition1])/(df[,condition2])
df$log2_FC <- log2(FC)
df <- df[order(-df$log2_FC),]
df$rank <- seq(1,nrow(df),1)
df$label <- ""
df$label[which(df$Gene.Name %in% selected_gene)] <- df$Gene.Name[which(df$Gene.Name %in% selected_gene)]
g <- ggplot(df, aes(x=rank, y=log2_FC, color=-rank, label = label))+
theme_classic(base_size = 20)+
labs(x="SE associated genes", y="Fold change (log2)")+
guides(color = F)+
scale_color_gradientn(colours=colorRampPalette(c("violetred1", "white", "royalblue"))(100))+
geom_point(size=2)+
geom_hline(yintercept = 0)+
scale_x_continuous(breaks = c(1, seq(100,300,100)))+
theme(axis.title = element_text(size = 25), axis.text = element_text(size = 20))
if(show_label) g <- g + geom_text_repel(na.rm = TRUE, size = 4.0, nudge_x = 5, nudge_y = 1, segment.alpha = 0.7, color = "black")
plot(g)
return(df)
}
TPM <- read.csv("Path to your 'Scaled_TPM.csv' generated by 'Calculate_scaled_TPM_of_RNA-seq.R'", stringsAsFactors = F)
colnames(TPM) <- c("ENSEMBL", "WT", "KO")
tmp <- apply(TPM[2:ncol(TPM)], 2, Add_1) %>% as.data.frame()
TPM <- cbind(TPM$ENSEMBL,tmp)
colnames(TPM)[1] <- "ENSEMBL"
TPM$ENSEMBL <- as.character(TPM$ENSEMBL)
TPM <- subset(TPM, WT > 2 | KO > 2) #Remove low expressing genes
WT_anno <- get_anno("Path to your 'WT_SE_annotated.txt' generated by 'findPeaks_annotatePeaks.sh'")
KO_anno <- get_anno("Path to your 'KO_SE_annotated.txt' generated by 'findPeaks_annotatePeaks.sh'")
WT_KO_TPM <- Make_plot_df(anno1 = WT_anno, anno2 = KO_anno, TPM = TPM, colname_of_WT_scale_TPM = "WT")
df <- get_plot(df = WT_KO_TPM, condition1 = "WT", condition2 = "KO",
selected_gene = c("Rin1", "Spp1", "Cdt1", "Anxa2", "Tpm1", "Gm48449"),
show_label = T)