forked from Naima16/dbd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdbd.perm2.R
155 lines (133 loc) · 5.88 KB
/
dbd.perm2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
library(ggplot2)
###spiders. test example
spiders = read.table("Spiders_28x12_spe.txt")
genera = c("Alop", "Alop", "Alop", "Arct", "Arct", "Aulo", "Pard", "Pard", "Pard", "Pard", "Troc", "Zora")
mat = as.matrix(spiders)
genus=genera
### emp biome table
EMP_data=read.csv("EMP_table.from_biom_w_taxonomy.txt",header=T,sep="\t")
dim(EMP_data)
emp1=EMP_data
emp1=emp1[,-1]
genus=array()
for (i in 1:dim(emp1)[1])
genus[i]=unlist(strsplit(unlist(strsplit(toString(emp1[i,"taxonomy"]),";"))[6],"__"))[2]
nb_genus=length(genus[!is.na(genus)])
emp2=cbind(emp1,genus)
#### drop unnamed genera
emp2=emp2[ !is.na(emp2$genus),]
colnames(emp2)=c(colnames(emp1),"genus")
ind_taxonomy=which(colnames(emp2)=="taxonomy")
ind_genus=which(colnames(emp2)=="genus")
mat = t(emp2[,-c(ind_taxonomy,ind_genus)])
genus=emp2$genus
### C code import to improve run time in the permutation loop
dyn.load("sample2.so")
nperm=99
out=dbd.perm2(mat,g=genus,perm.method=1,nperm=nperm,nn.print=0,clock=T)
###
dbd.perm2 <- function(mat, g, perm.method=1, nperm=999, nn.print=0,clock=TRUE)
# Permutation test for the "Diversity begets diversity" (dbd) paper.
# Vers. 2: general permutation of values by rows, by columns, or in the whole matrix "mat". Statistic to be tested: rich.genera ~ species.richness/N.genera
#
# Arguments –
# mat = data matrix (data.frame or matrix), sites in rows x species (or OTUs) in columns.
# g = vector of {genera, families, orders, classes, phyla} names in columns of mat
# perm.method = {1,2,3} : Different permutation methods.
# nperm = number of permutations.
# nn.print : print 'nn' lines of the output matrix. Default: nn=0.
# clock : If \code{clock=TRUE}, the computation time is printed. Useful when nperm large.
#
# Details –
# Matrix "mat2" (sites x genera) is produced first for the unpermuted data (mat2 = mat %*% A.perm), then after each permutation using one of the three methods below.
#
# Statistic to be tested: richness.genera ~ (how many species per genus in each site).
#
# A permutation test is a valid way of testing the relationship between x and y/x. The permutations must be done on y, or on the separate elements y and x, before the ratio variable y/x is recomputed after each permutation. Reference: Jackson & Somers (1991).
#
# Three permutation methods are implemented –
# Before computing mat2,
# Method 1. (Method A in the paper) Permute data in individual rows of mat1. (Model 2 in the paper)
# Method 2. (Method B in the paper) Permute data in individual columns of mat1. (Model 1 in the paper)
#
# ### Method 1 has low power. ###
# Value –
# The output list comprises the following elements:
# • Richness : data frame with 2 columns, rich.gen and Nsp.by.Ngen, for data.
# • Stat.out contains 2 columns: the correlation and the associated t-statistic.
# • p.val is the p-value computed from the test of the correlation statistic.
# • Richness.perm : data frame with 2 columns, rich.gen and Nsp.by.Ngen, permuted data.
#
# References –
# Jackson, D. A. & K. M. Somers. 1991. The spectre of ‘spurious’ correlations.
# Oecologia 86: 147-151.
#
# P. Legendre, Naima Madi september 2019
{
epsilon <- sqrt(.Machine$double.eps)
aa <- system.time({
mat = as.matrix(mat)
n <- nrow(mat)
p <- ncol(mat)
# Create matrix mat1 (sites x species), p/a data
mat1=.Call("presence_absence",mat)
rich.sp=.Call("richesse",mat1)
# Create matrix A of dummy variables (species x genera)
g = as.factor(g)
ng = length(g)
A = model.matrix(~g-1)
A = A[1:ng,]
colnames(A) = levels(g)
# Create matrix mat2 (sites x genera), p/a data
mat2=.Call("produit",mat,A)
mat2=.Call("presence_absence",mat2)
rich.gen=.Call("richesse",mat2)
Nsp.by.Ngen = rich.sp/rich.gen
Richness = as.data.frame(cbind(rich.gen,rich.sp, Nsp.by.Ngen))
Richness=Richness[Richness$rich.gen !=0,]
Richness.emp=Richness
Stat.out = matrix(NA,nperm+1,2)
colnames(Stat.out) = c("cor.sg", "cor.t")
rownames(Stat.out) = paste("perm", 0:nperm, sep=".")
# Compute statistics for unpermuted data and write them to Stat.out
Stat.out[1,1] <- cor.sg <- cor(Richness$rich.gen, Richness$Nsp.by.Ngen)
Stat.out[1,2] <- cor.t <- cor.sg * sqrt(n-2)/sqrt(1-cor.sg^2)
if(cor.sg > 1.0-sqrt(epsilon)) cat("cor = 1.0; a test is useless.\n")
#
# Permutations begin
if((perm.method<1) | (perm.method>3)) stop ("Method not implemented")
for(i in 1:(nperm)) {
mat1.perm = matrix(0,n,p)
## this is model A in the paper
if(perm.method==1) { # Permute p/a data in individual rows of mat1 (sites x species)
mat1.perm=.Call("sampleC_row_real",mat1)
}
##this is model B in the paper
if(perm.method==2) { # Permute data in individual columns of mat1 (sites x species)
mat1.perm = .Call("sampleC_col_real",mat1)
}
# Create matrix "mat2" of sites x genera
mat2=.Call("produit",mat1.perm,A)
mat2=.Call("presence_absence",mat2)
rich.gen=.Call("richesse",mat2)
length(rich.gen[rich.gen==0])
rich.sp=.Call("richesse",mat1.perm)
Nsp.by.Ngen.perm = rich.sp/rich.gen
Richness = as.data.frame(cbind(rich.gen, Nsp.by.Ngen.perm))
Richness=Richness[Richness$rich.gen !=0,]
# Compute statistics
Stat.out[i+1,1] <- cor.sg.perm <- cor(Richness$rich.gen, Richness$Nsp.by.Ngen.perm)
Stat.out[i+1,2] <- cor.t.perm <- cor.sg.perm * sqrt(n-2)/sqrt(1-cor.sg.perm^2)
}
#
# Print a few lines (nn lines) of the output matrix
if(nn.print>0) print(Stat.out[1:nn.print,])
})
aa[3] <- sprintf("%2f", aa[3])
if (clock) cat("Time for computation =", aa[3], " sec\n")
# Compute p-value
p.val = length(which(Stat.out[,2] >= Stat.out[1,2]))/(nperm+1)
#
###on devrait sortir le Richness des vraies data celui la est permuté
list(Richness=Richness.emp,Stat.out=Stat.out, p.val=p.val,Richness.perm=Richness)
}