-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetabolic_graphs.qmd
344 lines (279 loc) · 11.2 KB
/
metabolic_graphs.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
```{r include=FALSE,message=FALSE}
knitr::opts_chunk$set(echo = TRUE,
cache=TRUE,
warning = FALSE,
message = FALSE, out.width = "100%",cache.lazy=FALSE)
pdf="TRUE"
library(tidyverse)
library(igraph)
library(ComplexHeatmap)
library(viridis)
library(circlize)
library(plotly)
library(randomcoloR)
library(factoextra)
library(RColorBrewer)
library(kableExtra)
library(igraph)
library(GGally)
```
```{r}
gc()
load(file='metadag_work_space.RData')
gc()
```
# Metabolic Graphs
We present here some analysis examples of the metabolic graphs generated in GraphML format.
## Metabolic graphs for each organism
Read the individual metabolic graphs generated for Homo sapiens (KEGG id: hsa) in the directory(Individuals/hsa)
```{r}
experiment=
"0a845f74-826e-3b46-aed9-e7ecf74db262/"
path_exp=paste0("data/",experiment)
files_hsa=dir(paste0(path_exp,"Individuals/hsa"))
files_hsa
```
```{r echo=FALSE}
files_hsa=dir(paste0(path_exp,"Individuals/hsa"))
# "plot mDAG pdf format",
# "plot mDAG svg format",
type_file=c("m-DAG GraphML format",
"m-DAG pdf graphic",
"m-DAG svg graphic",
"csv file with the adjacency matrix of the m-DAG",
"pdf graphic with the biggest conected component of the m-DAG",
"svg graphic with the biggest conected component of the m-DAG ",
"csv file with the node (MBBs) labels of the m-DAG",
"csv file with all connected components of the m-DAG",
"csv file with the adjacency matrix of the reaction graph",
"csv file with the node (reactions) labels of the reaction graph",
"reaction graph GraphML format",
"reaction graph pdf graphic",
"reaction graph svg graphic",
"text summary file with the number of MBBs, reactions, etc. in the previous graphs"
)
knitr::kable(data.frame(files_Individual_hsa=files_hsa, Description=type_file))
```
## Pan & core metabolic graphs
Pan and core metabolic graphs for every group were generated. For instance, one can read the pan and core metabolic graphs generated for the group Algae in the directory (Groups/Algae).
```{r}
files_Algae=dir(paste0(path_exp,"Groups/Algae"))
files_Algae
```
The global core reaction graph, which is the core of all the organisms' reaction graphs in this Eukaryotes test, is empty.
```{r graph_core_ALL}
graph_core_RC=read.graph(
paste0(path_exp,
"Global/core/core_RC.graphml"),
format = "graphml")
summary(graph_core_RC)
```
The global core reaction graph has `r igraph::vcount(graph_core_RC)` vertex and `r igraph::ecount(graph_core_RC)` edges. It is an empty graph.
The core reaction graph for the Algae group is:
```{r fig.cap="Algae core reaction graph"}
knitr::include_graphics(
paste0(path_exp,"Groups/MSA_Cluster_3/core/MSA_Cluster_3_core_RC.pdf"))
```
The global core m-DAG, i.e., the core of all organisms in this example is empty.
```{r}
graph_core_mDAG=read.graph(
paste0(path_exp,"Global/core/core_mDAG.graphml"),
format = "graphml")
summary(graph_core_mDAG)
```
The global core m-DAG has `r igraph::vcount(graph_core_mDAG)` vertex and `r igraph::ecount(graph_core_mDAG)` edges. It is an empty graph.
The core metabolic DAG for the Algae group is:
```{r fig.cap="Core m-DAG for Algae"}
knitr::include_graphics(paste0(path_exp, "Groups/Algae/core/Algae_core_mDAG.pdf"))
```
The global pan reaction graph for the Animals Kingdom is:
```{r}
graph_pan_RC=read.graph(
paste0(path_exp,
"TaxonomyLevels/Kingdom/Animals/pan/Animals_pan_RC.graphml"),
format = "graphml")
summary(graph_pan_RC)
```
This pan reaction graph has `r igraph::vcount(graph_pan_RC)` nodes and `r igraph::ecount(graph_pan_RC)` edges.
## Graph's topology
From the GraphML files, one can extract topological information. Some examples are as follows.
The diagram below illustrates the distribution of node degrees for an m-DAG.
```{r}
graph_mDAG=read.graph(
paste0(path_exp,
"Individuals/hsa/hsa_mDAG.graphml"),
format= "graphml")
summary(graph_mDAG)
barplot(table(igraph::degree(graph_mDAG,mode="all")),
ylim=c(0,350),col="blue",
main="Frequency of Node Degrees",
ylab="Frequency",xlab="Degree")
```
The connected components of every graph as well as the size of every connected component can be obtained as:
```{r}
compo=components(graph_mDAG,mode = "weak")
str(compo)
compo$csize
k=which.max(compo$csize==max(compo$csize))
k
table(compo$membership)
vertex=which(compo$membership==k)
length(vertex)
Big_Component=induced_subgraph(graph_mDAG, vids=vertex)
igraph::vcount(Big_Component)
igraph::ecount(Big_Component)
```
And the plot of the bigger component of the m-DAG in Homo sapiens is:
```{r fig.align='center',title="Bigger component of hsa mDAG"}
knitr::include_graphics(paste0(path_exp,
"Individuals/hsa/hsa_mDAG_biggerDAG.pdf"))
```
```{r}
#path_exp="data/result_bb261b6e-95c6-3e39-b82b-b68eea80e30b/data/"
list_names=dir(paste0(path_exp,"Individuals/"))
list_names= list_names[-1] # filter 0000_RefPw
length(list_names)
graphs_list=paste0(path_exp,"Individuals/", list_names,"/",list_names, "_MDAG.graphml")
```
```{r}
knitr::include_graphics(
paste0(path_exp,"Individuals/cang/cang_RC.pdf"))
```
## Graph statistics
The number of connected component in each generated m-DAG with their frequency in the entire set of m-DAGs, can be obtained as follows:
```{r ,warning=FALSE}
read_mDAG=function(x) {DAG=read.graph(file=x,
format="graphml")
return(DAG)}
mDAG_components=function(x) {
sort(components(x,mode = "weak")$csize,
decreasing=TRUE)
}
compo_list=lapply(graphs_list,
FUN=function(x) {
gg=read_mDAG(x)
aux=list(
mDAG_components=mDAG_components(gg),
degree_count=igraph::degree(gg,mode="total"))
return(aux)}
)
names(compo_list)=list_names
n=max(sapply(compo_list,FUN=function(x) {length(x[[1]])}))
n
size_compo_list=lapply(compo_list,FUN=function(x) {
return(c(x[[1]],rep(NA,n-length(x[[1]]))))})
aux=do.call(bind_cols,size_compo_list)
weak_components_size=pivot_longer(aux,aaf:zvi,names_to="Organism",
values_to="csize") %>%
arrange(Organism,-csize)
weak_components_size$index=rep(1:n,times=dim(aux)[2])
weak_components_size=weak_components_size %>%
left_join(meta_taxo,by="Organism") %>% filter(!is.na(Kingdom),!is.na(csize))
weak_components_size_raw=weak_components_size
```
```{r ,warning=FALSE,cache=FALSE}
Organism=names(compo_list)
size_MBB=function(org){
#org="hsa"
x=Results %>% filter(Organism==org) %>% select(!contains("_rev"))
x=as.character(x[1,6:dim(x)[2]])
x=x[(!is.na(x))]
x=x[x!="NA"]
tt=data.frame(sort(table(x),decreasing=TRUE),org)
names(tt)=c("MBB","size","Organism")
return(tt)
}
size_list_raw= lapply(Organism,FUN=function(x) size_MBB(x))
names(size_list_raw)=Organism
```
```{r ,warning=FALSE,cache=FALSE}
size_MBB_df=do.call(rbind,size_list_raw)
max(size_MBB_df$size)
size_MBB_df=size_MBB_df %>% left_join(meta_taxo%>% select(Organism,Kingdom),by="Organism") %>% filter(!is.na(Kingdom))
```
We can visualize the sizes of the weak components for each m-DAG, using colors to represent the different Kingdoms, also we scale the results by a log-log plot:
```{r}
COLOR_KINGDOM=c("red","yellow","green","black")
colors_kingdom=weak_components_size%>% select(Organism,Kingdom) %>% distinct()
names(COLOR_KINGDOM)=sort(unique(colors_kingdom$Kingdom))
p1<- ggplot(data=weak_components_size) +
geom_line(mapping=aes(x=index,
y=csize,group = Organism,
color=Kingdom),
na.rm=TRUE) +
scale_x_continuous(trans="identity") +
scale_y_continuous(trans="identity") +
ylim(0,640)+
ggtitle("Plot of weak components size decreasing order.")+
ylab("Weak components size") + xlab("Order")+
scale_color_manual(values =COLOR_KINGDOM[colors_kingdom$Kingdom])
p2<- ggplot(data=weak_components_size) +
geom_line(mapping=aes(x=index,
y=csize,group = Organism,
color=Kingdom),
na.rm=TRUE) +
scale_y_continuous(trans='log10') +
scale_x_continuous(trans='log10') +
scale_color_manual(values =COLOR_KINGDOM[colors_kingdom$Kingdom])+
ggtitle("Plot log10-log10 of size weak components decreasing order.") +
ylab("Log10 weak component size") + xlab("Log10 order")
p1
p2
```
A table with the frequencies of the weak connected components sizes, displayed by Kingdom, can be obtained as follows:
```{r}
aux=table(weak_components_size$csize,weak_components_size$Kingdom)
table_wcc_size=tibble(Order=1:dim(aux)[1],
Wcc_size=as.integer(unlist(dimnames(aux)[1])),
Animals=aux[,1],
Fungi=aux[,2],
Plants=aux[,3],
Protists=aux[,4])
knitr::kable(table_wcc_size,
caption = "Weak Connected Componet Size")%>% kable_styling(bootstrap_options = "striped", full_width = FALSE) %>%
scroll_box(height = "300px", width = "100%")
```
A table with the frequencies of the MBBs sizes displayed by Kingdom can be obtained as follows:
```{r}
size_MBB_df_table= size_MBB_df %>% group_by(Kingdom,size) %>% summarise(n=n())
table_MBB_size = size_MBB_df_table %>% pivot_wider(names_from = Kingdom, values_from = n, values_fill = list(n = 0)) %>% arrange(size)
knitr::kable(table_MBB_size, caption = "MBB Size: Frequency by Kingdom")%>% kable_styling(bootstrap_options = "striped", full_width = FALSE) %>% scroll_box(height = "300px", width = "100%")
```
```{r include=FALSE}
write_csv(table_MBB_size,"table_MBB_size.csv")
```
```{r pasar_load, include=FALSE}
save.image(file='metadag_work_space.RData')
```
### More statistics
h
```{r}
weak_components_size$Kingdom <- factor(weak_components_size$Kingdom)
#weak_components_size
g <- ggplot(weak_components_size) +
xlab("") + # Eliminar el título del eje X
ylab("Size of connected component") + # Etiqueta del eje Y
geom_jitter(aes(x = Kingdom, y = csize, color = Kingdom),
size = 1) + # Colorear puntos según 'Kingdom' y reducir el tamaño
scale_y_continuous(breaks = seq(0, 640, by = 100)) + # Escala del eje Y con saltos de 20
theme_minimal() + # Tema minimalista con fondo blanco
ggtitle("") # Título del gráfico
g
```
```{r}
weak_components_size_max = weak_components_size %>%
group_by(Organism) %>%
summarise(csize = max(csize),Kingdom=first(Kingdom))
# Crear el gráfico de violín con puntos jitter
g <- ggplot(weak_components_size_max, aes(x = Kingdom, y = csize)) +
geom_violin(aes(fill = Kingdom), trim = FALSE, alpha = 0.5) + # Gráfico de violín con relleno por 'Kingdom' y sin recortar
geom_jitter(aes(color = Kingdom), size = 1, width = 0.2) + # Puntos con jitter para evitar solapamientos
xlab("") + # Eliminar el título del eje X
ylab("Size of biggest connected component") +
scale_y_continuous(breaks = seq(0, 640, by = 100)) +
theme_minimal() + # theme background white
ggtitle("") #
#Size of the largest weakly connected component by Kingdom \n (Violin Plot with Jittered Points)
# Mostrar el gráfico
g
```