-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdawidSkene.R
78 lines (61 loc) · 1.66 KB
/
dawidSkene.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
setwd("~/workspace/repo/dawidSkene/")
require("dplyr")
require("ggplot2")
source("dawidSkene.lib.R")
f <- dataGen()
#f <- f[1:5,1:2,1:2]
I = dim(f)[1]
J = sum(!is.na(unique(as.vector(f)))) #sum(!is.na(unique(unlist(apply(X = f, MARGIN = 2, unique)), na.rm = T)))
K = dim(f)[2]
# Tested N
N <- array(NA, dim = c(I, J, K)) #
for (i in 1:I){
for (j in 1:J){
for (k in 1:K){
N[i,j,k] <- sum(f[i,k]== j, na.rm = T)
}
}
}
### Step i: Initial Estimates
Tinit <- matrix(0, nrow = dim(f)[1], ncol = J)
for (i in 1:I){
for (j in 1:J){
Tinit[i,j] <- sum(N[i,j,])/sum(N[i,,])
}
}
T_ij <- Tinit
for (iter in 1:40){
### Step ii: Estimate PI_ij and p_j #checked
PI <- array(data = NA, dim = c(J,J,K))
for (k in 1:K){
for (j in 1:J){
for (l in 1:J){
PI[j,l,k] <- T_ij[,j] %*% N[,l,k] / sum(T_ij[,j] %*% N[,,k])
}
}
}
P <- array(data = NA, dim = J) #Check
P <- colSums(T_ij)/I #Check
### Step iii: estimating new true labels
TijNew <- array(data = 1, dim = c(I,J))
for (i in 1:I){
for (k in 1:K){
for (l in 1:J){
TijNew[i,] <- TijNew[i,] * PI[,l,k] ^ N[i,l,k]
}
}
TijNew[i,] <- (TijNew[i,] * P)/sum(TijNew[i,] * P)
}
# TijNew <- array(data = 1, dim = c(I,J))
# for (i in 1:I){
# for (k in 1:K){
# for (l in 1:J){
# TijNew[i,] <- TijNew[i,] * PI[,l,k] ^ N[i,l,k] * P
# }
# }
# TijNew[i,] <- TijNew[i,]/sum(TijNew[i,])
# }
T_ij <- TijNew
}
output <- list("f" = f, "T"= T_ij, "Pi"= PI, "P" = P, "N" = N, "class" = as.integer(T_ij[,2] > .5))
save(output, file = "~/Dropbox/population_genomics_project_collaboration/pairwise_comparison/parasites.Rdat")