forked from Naima16/dbd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGLMM_CLASSperPHYLUM.R
141 lines (112 loc) · 5.77 KB
/
GLMM_CLASSperPHYLUM.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
##### GLMM Class:Phylum ratio
##### 28 oct 2018, NM
if(!require(lme4)){install.packages("lme4")}
require(lme4)
if(!require(readxl)){install.packages("readxl")}
require(readxl)
if(!require(lattice)){install.packages("lattice")}
require(lattice)
if(!require(jtools)){install.packages("jtools")}
require(jtools)
overdisp_fun <- function(model) {
## number of variance parameters in
## an n-by-n variance-covariance matrix
vpars <- function(m) {
nrow(m)*(nrow(m)+1)/2
}
model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model))
rdf <- nrow(model.frame(model))-model.df
rp <- residuals(model,type="pearson")
Pearson.chisq <- sum(rp^2)
prat <- Pearson.chisq/rdf
pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}
### table des PI
study_lab=read_excel("~/lab_emp.xlsx", col_names = TRUE)
colnames(study_lab)=c("study","PI")
##### table des empo pour chaque sample
tab_emp=read.csv("~/sample_emplevel.tsv",header=T)
tab_emp=tab_emp[,-1]
colnames(tab_emp)
colnames(tab_emp)=c("sample","empo_0", "empo_1" ,"empo_2","empo_3")
tab_emp=as.data.frame(tab_emp)
tab_emp$empo_1=as.factor(tab_emp$empo_1)
tab_emp$empo_2=as.factor(tab_emp$empo_2)
tab_emp$empo_3=as.factor(tab_emp$empo_3)
### table des nb_phyla, phyla type et le nombre correspondant en classe pour chaque sample
tab_phylatype=read.table("~/phyla_tab_2k.tsv",header=T,sep=",")
tab_phylatype=as.data.frame(tab_phylatype)
tab_phylatype$phylum_type=as.factor(tab_phylatype$phylum_type)
tab_phylatype_emp=merge(tab_phylatype[,-1],tab_emp[,c("sample","empo_3")],by="sample")
### ajouter le lab
for (i in 1:nrow(tab_phylatype_emp) )
tab_phylatype_emp[i,'study']=strsplit(as.character(tab_phylatype_emp[i,]$sample),"[.]")[[1]][1]
tab.final=merge(tab_phylatype_emp,study_lab,by="study")
tab.final$PI=as.factor(tab.final$PI)
tab.final$phylum_type=as.factor(tab.final$phylum_type)
tab.final$empo_3=as.factor(tab.final$empo_3)
######################
save(tab.final,file="tab_final_phylum.RData")
##########
hist(tab.final$nb_class)
hist(tab.final$nb_phylum)
#### z transform the predictor
pvar='nb_phylum'
datsc <- tab.final
datsc[pvar] <- lapply(tab.final[pvar],scale)
##full model
glmer.phylum = glmer(nb_class~nb_phylum+(nb_phylum|phylum_type/empo_3)+(nb_phylum|empo_3/PI)+(nb_phylum|sample),datsc,family=poisson(link=log),
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=100000)))
save(glmer.phylum,file="glmer_phylum.RData")
summary(glmer.phylum)
##### supprimer la singularité : sample variation is 0
glmer.phylum.sample.1 = glmer(nb_class~nb_phylum+(nb_phylum|phylum_type/empo_3)+(nb_phylum|empo_3/PI),datsc,family=poisson(link=log),
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=100000)))
save(glmer.phylum.sample.1,file="glmer_phylum_sample_sept.RData")
summary(glmer.phylum.sample.1)
##### supprimer la singularité: il y a une correlation de 1 entre slope et intercept de biome, on garde biome juste sur slope
glmer.phylum.biomSlop = glmer(nb_class~nb_phylum+(nb_phylum|phylum_type/empo_3)+(nb_phylum|empo_3:PI)+(nb_phylum-1|empo_3),datsc,family=poisson(link=log),
control=glmerControl(optimizer="bobyqa"))
summary(glmer.phylum.biomSlop)
save(glmer.phylum.biomSlop,file="glmer_phylum_biomSlop.RData")
###tester la significance des randoms
glmer.phylum.biomSlop1 = glmer(nb_class~nb_phylum+(nb_phylum|phylum_type/empo_3)+(nb_phylum|empo_3:PI),datsc,family=poisson(link=log),
control=glmerControl(optimizer="bobyqa"))
glmer.phylum.biomSlop2 = glmer(nb_class~nb_phylum+(nb_phylum|phylum_type/empo_3),datsc,family=poisson(link=log),
control=glmerControl(optimizer="bobyqa"))
glmer.phylum.biomSlop3 = lm(nb_class~nb_phylum,datsc)
anova(glmer.phylum.sample.1,glmer.phylum.biomSlop,glmer.phylum.biomSlop1,glmer.phylum.biomSlop2,glmer.phylum.biomSlop3)
##### glmer.phylum.biomSlop est le plus significatif
#### test the fixed effect significance
g1=update(glmer.phylum.biomSlop,.~1+(nb_phylum|phylum_type/empo_3)+(nb_phylum|empo_3:PI)+(nb_phylum-1|empo_3))
anova(glmer.phylum.biomSlop,g1) ## glmer.phylum.biomSlop is significant
##### overdispersion test and diagnostic plots
overdisp_fun(glmer.phylum.biomSlop)
plot(glmer.phylum.biomSlop)
qqnorm(residuals(glmer.phylum.biomSlop))
######
#### DBD variation across environments
#### biome as fixed effect
#### model without the main effects (of empo3 and nb_phylum, i.e. separate intercept
#### for every biome)
##########
glmer.phylum.biom.2 = glmer(nb_class~nb_phylum:empo_3+(nb_phylum|phylum_type/empo_3)+(nb_phylum|empo_3:PI),datsc,family=poisson(link=log),
control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=100000)))
summary(glmer.phylum.biom.2)
save(glmer.phylum.biom.2,file="glmer_phylum_biomFixed2.RData")
#####significance de l'interaction
g_1 = glmer(nb_class~1+(nb_phylum|phylum_type/empo_3)+(nb_phylum|empo_3:PI),datsc,family=poisson(link=log),
control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=100000)))
anova(glmer.phylum.biom.2,g_1)
##### interaction + the main effects of nb_phylum and empo_3
glmer.phylum.biom.21 = glmer(nb_class~nb_phylum*empo_3+(nb_phylum|phylum_type/empo_3)+(nb_phylum|empo_3:PI),datsc,family=poisson(link=log),
control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=100000)))
save(glmer.phylum.biom.21,file="glmerphylumbiom21.RData") ##only 4 aic difference
anova(glmer.phylum.biom.2,glmer.phylum.biom.21) ## dAIC = 4, weak dAIC means the two models are equivalent, we keep the parcimonious model
##
#### diagnostic plots and overdispersion
interact_plot(glmer.phylum.biom.2,pred=nb_phylum,modx=empo_3)
overdisp_fun(glmer.phylum.biom.2)
plot(glmer.phylum.biom.2)
qqnorm(residuals(glmer.phylum.biom.2))