-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path01_nimble.qmd
986 lines (791 loc) · 33.1 KB
/
01_nimble.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
---
title: "Introduction to `nimble`"
format:
html:
toc: true
toc-location: right
author:
- Baptiste Alglave
- Julien Chiquet
- Pierre Gloaguen
- Emily Walker
editor_options:
chunk_output_type: console
---
# Required packages for this tutorial {.unnumbered}
```{r}
#| label: packages_nimble_section
#| warning: false
#| message: false
#| cache: false
library(compareMCMCs)
library(ggmcmc)
library(mvtnorm)
library(nimble)
library(nimbleHMC)
library(tidyverse)
library(parallel)
library(future)
library(furrr)
```
# Short presentation of `nimble`
First, a very good reference: https://r-nimble.org/html_manual/cha-welcome-nimble.html
The `nimble` package is:
- A system for writing statistical models flexibly (based on `bugs` and `jags`).
- A library of inference algorithms (MCMC, HMC, Laplace approximation).
- A compiler that generates and compiles C++ for your models and algorithms without knowing it is C++.
There is a `nimble` language with the basics for building statistical models (e.g. the key probability distributions, some key transformation functions).
But, this remains limited and to have full flexibility, ones need to build its own functions, which **can be very challenging**.
# Fitting a model in a bayesian framework with nimble
## Toy model
We observe a sample $Y_1,\dots, Y_n$ of i.i.d. random variables having a negative binomial distribution with parameter $p \in [0, 1]$ and $\theta \in \mathbb{R}_+^*$.
Formally, for $i \in \lbrace 1,\dots, n\rbrace$, and $k \in \mathbb{N}$, we have that:
$$
\mathbb{P}\left(Y_i = k \vert p, \theta\right) = \frac{\Gamma(k + \theta)}{k!\Gamma(\theta)}p^\theta(1 - p)^k\,.
$$
Let's simulate data from this model with $n = 100, p = 0.4$ and $\theta = 12$:
```{r}
#| label: example1_data
set.seed(123) # For reproducibility
data_ex1 <- rnbinom(n = 100, prob = 0.4, size = 12)
```
Our goal is to estimate $p$ and $\theta$ from these observations, within a Bayesian framework.
For this tutorial, we assume the following priors:
\begin{align*}
\theta &\sim \mathcal{E}(0.1)\,,\\
p &\sim \mathcal{U}\left[0, 1\right]\,.\\
\end{align*}
## Defining a negative binomial model in `nimble`
Basically, as in `BUGS`or `JAGS`, the user's role is to write the way to simulate the data and to give the prior distributions of the unkown. This is done within the `nimbleCode` function.
This function will typically need to use built-in distributions that can be seen in the native documentation.
All random variables must be assigned using the `~` symbol while deterministic quantities are assigned using the `<-` or `=` as in `R`.
Overall, the syntax is quite similar to `R`.
```{r}
#| label: code_neg_bin
code_neg_bin <- nimbleCode({
# Observation model
for(i in 1:n){# n is never defined before, it will be a constant
y[i] ~ dnbinom(prob, theta)
}
# PRIORS
prob ~ dunif(0, 1)
theta ~ dexp(0.1)
})
```
Note that in this code, nothing distinguishes observed data from unknown (or latent variables).
The order of lines has no importance as everything will be compiled afterwards.
## Defining the nimble model
Now that the code exists, we define the model. That's here that data and constants will be provided.
Typically, data are quantities which are considered as realizations of random variables in the code, while constants are not.
```{r}
#| label: model_neg_bin
model_neg_bin <- nimbleModel(code = code_neg_bin,
name = "Negative binomial",
constants = list(n = length(data_ex1)),
data = list(y = data_ex1))
```
Note that the code points that we did not give initial guesses (which would typically be starting points for MCMC sampling algorithms).
We will do it in the sampling step.
## Basic MCMC sampling
A direct way to proceed is to use the `nimbleMCMC` function that provides basic Metropolis Hastings within Gibbs sampling.
```{r}
#| label: posterior_sampling_neg_bin
#| cache: true
posterior_samples_neg_bin <- nimbleMCMC(model_neg_bin,
inits = list(prob = 0.5, theta = 1),
nchains = 2, # Number of independent chains
niter = 10000, # Number of it. per chain
thin = 10, # Thinning
nburnin = 1000) # Number of initial iterations discarded
```
## Exploring the results
Now that we have performed MCMC sampling, we can access the results, which are lists (one element per chain) of matrices having $n_{\text{iter}}$ rows and $n_{\text{parameters}}$ columns.
```{r}
#| label: str_posterior_samples
str(posterior_samples_neg_bin)
```
To perform any post processing or plotting results, a bit of formatting must be done.
```{r}
#| label: custom_formatted_results
formatted_results <- imap_dfr(posterior_samples_neg_bin,
function(x, nm){
as.data.frame(x) %>%
rowid_to_column(var = "Iteration") %>%
mutate(Chain = str_remove(nm, "chain"))
}) %>%
pivot_longer(cols = -c("Iteration", "Chain"),
names_to = "Parameter",
values_to = "value")
```
We can then perform usual plots.
```{r}
#| label: plot
ggplot(formatted_results) +
aes(x = Iteration,
y = value, color = Chain) +
facet_wrap(~Parameter, scales = "free") +
geom_line() +
labs(x = "Sample ID", y = "Parameter value", color = "")
```
### Package for automatic formatting of results
For `ggplot`users, the `ggmcmc` package provide useful tools for plots and formatting of MCMC outputs in `R` (not necessarily for the `nimble` package).
This package is suited for any `coda` object, which is an historic format for MCMC outputs in `R`.
We can specify during the sampling that we want outputs to be in `coda`.
```{r}
#| label: posterior_samples_coda
#| cache: true
posterior_samples_neg_bin <- nimbleMCMC(model_neg_bin,
nchains = 2,
niter = 10000,
thin = 10,
nburnin = 1000,
samplesAsCodaMCMC = TRUE)
```
We can see that this modifies the type of output:
```{r}
#| label: str_posterior_samples_coda
str(posterior_samples_neg_bin)
```
Now, we can use the `ggs`function which performs the post processing that we made above.
```{r}
#| label: coda_formatted_results
formatted_results <- ggs(posterior_samples_neg_bin)
formatted_results # same as above
```
Then, everything works as before!.
## Defining a `nimbleFunction`
What makes `nimble`'s popularity is it suitability for statistical programming.
As your specific model will certainly requires specific functions, we cannot expect to find all our tools in the built-in function.
However, we can define new functions in a syntax which is pretty similar to `R`.
### Alternative parameterization of the negative binomial
Suppose now we want to perform negative binomial regression.
In this context, we model the expectation (typically through a link to some covariates) of the response variable.
Typically, if we denote, for all $1\leq i \leq n$, $\mu = \mathbb{E}\left[Y_i\right]$, we assume the following prior:
$$
\ln \mu \sim \mathcal{N}\left(0, 1\right)\,.
$$
Sadly, in `nimble`, we do not have access to an implementation of the negative binomial distribution parameterized by $(\mu, \theta)$.
However, we know that:
$$
\mu = \theta \times \frac{1 - p}{p}\,,
$$
or, equivalently, that:
$$
p = \frac{\theta}{\theta + \mu}
$$
```{r}
#| label: get_p_from_mu
get_p_from_mu <- nimbleFunction(
run = function(mu = double(0),
theta = double(0)) { # type declarations
returnType(double(0)) # return type declaration
output <- theta / (theta + mu)
return(output)
})
get_p_from_mu(18, 12) # Works as a usual R function
```
```{r}
#| label: code_neg_bin_alternatif
#| cache: true
code_alternatif <- nimbleCode({
# Observation model
for(i in 1:n){# n is never defined before, it will be a constant
y[i] ~ dnbinom(prob, theta)
}
# Alternative vectorized formulation
# y[1:n] ~ dnbinom(prob, theta)
# PRIORS
log_mu ~ dnorm(0, 1)
theta ~ dexp(0.1)
# Quantites deterministes
mu <- exp(log_mu)
prob <- get_p_from_mu(mu = mu, theta = theta)
})
model_alternatif <- nimbleModel(code = code_alternatif,
name = "Alternative negative binomial",
constants = list(n = length(data_ex1)),
data = list(y = data_ex1),
inits = list(mu = 0.5, theta = 1))
posterior_samples_alternatif <- nimbleMCMC(model_alternatif,
nchains = 2,
niter = 10000,
thin = 10,
nburnin = 1000,
monitors = c("prob", "theta"),
samplesAsCodaMCMC = TRUE)
```
## Defining new distribution
An alternative is to define a new distribution.
```{r}
#| label: dmynegbin
dmynegbin <- nimbleFunction(
run = function(x = double(0),
mu = double(0),
theta = double(0),
log = integer(0, default = 0)) {
returnType(double(0))
prob = get_p_from_mu(mu, theta)
output <- dnbinom(x, size = theta, prob = prob, log = log)
return(output)
})
registerDistributions(list(
dmynegbin = list(BUGSdist = "dmynegbin(mu, theta)",
discrete = TRUE, pqAvail = FALSE)
))
```
```{r}
code_with_my_dist <- nimbleCode({
# Observation model
for(i in 1:n){# n is never defined before, it will be a constant
y[i] ~ dmynegbin(mu, theta) # my distribution
}
# PRIORS
log_mu ~ dnorm(0, 1)
mu <- exp(log_mu)
theta ~ dexp(0.1)
})
model_with_my_dist <- nimbleModel(code = code_with_my_dist,
name = "Alternative negative binomial",
constants = list(n = length(data_ex1)),
data = list(y = data_ex1),
inits = list(log_mu = 0.5, theta = 1))
compileNimble(model_with_my_dist)
posterior_samples_alternatif <- nimbleMCMC(model_with_my_dist,
nchains = 2,
niter = 10000,
thin = 10,
nburnin = 1000,
monitors = c("mu", "theta"),
samplesAsCodaMCMC = TRUE)
```
```{r}
posterior_samples_alternatif %>%
ggs() %>%
ggplot() +
aes(x = Iteration,
y = value, color = factor(Chain)) +
facet_wrap(~Parameter, scales = "free") +
geom_line() +
labs(x = "Iteration", y = "Parameter value", color = "")
```
## Alternative MCMC sampler
One big strength of `nimble` are the several samplers that are available in the package.
## Conjuguate priors
First, nimble is able to identify conjugate priors and make the exact computation of the posterior [link](https://r-nimble.org/html_manual/cha-mcmc.html#conjugate-gibbs-samplers).
## HMC algorithm
`nimble` provides support for Hamiltonian Monte Carlo (HMC) and compute the derivatives of the likelihood through automatic differentiation. The `nimbleHMC` package implement two versions of No-U-Turn (NUTS) HMC sampling: the standard one developed in Hoffman and Gelman ([link](https://jmlr.org/papers/volume15/hoffman14a/hoffman14a.pdf)) and an updated one with improved adaptation routines and convergence criteria, which matches the HMC sampler of `STAN`.
In order to allow an algorithm to use AD for a specific model, that model must be created with `buildDerivs = TRUE` and `calculate = FALSE`.
```{r}
#| label: HMC_code
#| cache: true
# Build model with nimble
model_neg_bin_HMC <- nimbleModel(code = code_neg_bin,
name = "Negative binomial",
constants = list(n = length(data_ex1)),
data = list(y = data_ex1),
inits = list(prob = 0.5, theta = 1),
calculate = FALSE, buildDerivs = TRUE) # This is the line required for running HMC
C_model_neg_bin_HMC <- compileNimble(model_neg_bin_HMC) # Compile the model (they require this for the compilation of the HMC object)
# Build the MCMC algorithm which applies HMC sampling
HMC <- buildHMC(C_model_neg_bin_HMC)
# Careful here, when the model has random effects
# HMC requires to set values in the model before running the algorithm
# One solution is to simulate with the model and set the model with these values
# See : https://r-nimble.org/html_manual/cha-mcmc.html#subsec:HMC-example
# Here, as the model is simple, there is no need for this and everything is handled withing nimble/nimbleHMC
## Then everything is standard in nimble
CHMC <- compileNimble(HMC) # Compile the HMC model/algo
samples <- runMCMC(CHMC, niter = 1000, nburnin = 500) # Short run for illustration
summary(coda::as.mcmc(samples)) # Summary of the estimates
```
And there are plenty of others samplers:
- Particle filters / sequential Monte Carlo and iterated filtering (package `nimbleSMC`)
- Monte Carlo Expectation Maximization (MCEM)
See [link](https://r-nimble.org/html_manual/cha-algos-provided.html#cha-algos-provided)
## The laplace approximation
`nimble` also implements the Laplace approximation. But be careful, it performs maximum likelihood estimation. This is not the same as `INLA` (fully bayesian approach), but more like `TMB` (or `glmmTMB`- maximum likelihood estimation through Laplace approximation and automatic differentiation).
```{r}
#| label: Laplace_code
#| cache: true
# We need the derivatives to build the Laplace algorithm
# so we take the object model_neg_bin_HMC built previously
model_laplace <- buildLaplace(model_neg_bin_HMC)
Cmodel_laplace <- compileNimble(model_laplace)
# Get the Laplace approximation for one set of parameter values.
Cmodel_laplace$calcLaplace(c(0.5,0.5))
# Get the corresponding gradient.
Cmodel_laplace$gr_Laplace(c(0.5,0.5))
# Search the (approximate) MLE
MLE <- Cmodel_laplace$findMLE(c(0.5,0.5)) # Find the (approximate) MLE.
MLE$par
# Get log-likelihood value
MLE$value
# And output summaries
Cmodel_laplace$summary(MLE)
```
N.b this example is only for illustration of the code. The Laplace approximation is relevant only when there are random effects in the model (which is not the case here).
For a full example see [link](https://r-nimble.org/html_manual/cha-AD.html#sec:AD-laplace)
## Comparing MCMC algorithms
One can compare several algorithms through the package `compareMCMCs`. It is possible to compare several algorithms internal to `nimble` with those from `jags` (or even `STAN`) algorithms. An example below for `nimble` and `STAN`.
```{r}
#| label: compareMCMC_code
#| cache: true
# This model code will be used for both nimble and JAGS
modelInfo <- list(
code = code_neg_bin,
constants = list(n = length(data_ex1)),
data = list(y = data_ex1),
inits = list(prob = 0.5, theta = 1)
)
# Here is a custom MCMC configuration function for nimble
configure_nimble_slice <- function(model) {
configureMCMC(model, onlySlice = TRUE)
}
# Here is the call to compareMCMCs
res <- compareMCMCs(modelInfo,
MCMCs = c('nimble', # nimble with default samplers
'nimble_slice' # nimble with slice samplers
),
nimbleMCMCdefs =
list(nimble_slice = 'configure_nimble_slice'),
MCMCcontrol = list(inits = list(prob = 0.5, theta = 1),
niter = 10000,
burnin = 1000))
make_MCMC_comparison_pages(res, modelName = 'code_neg_bin',dir = "/tmp/",
control = list(res = 75))
```
# Another example: multivariate normal with zero inflated component
We consider a multivariate Gaussian model with zero inflation, where the probability in the zero inflation can depend on the variable. The vector of observations $Y_i$ lives in $\mathbb{R}^p$, and its distribution is defined conditionnally on a (multivariate) Bernoulli $W_i \in {0,1}^p$ and a multivariate Gaussian variable $Z_i\in\mathbb{R}^p$:
\begin{equation}
\begin{array}{rcl}
Y_{ij} | Z_i, W_{i} & = & W_{ij} \delta_0 + (1 - W_{ij}) Z_{ij}\\
W_{i} & \sim & \otimes_j \mathcal{B}(\pi_j) \\
Z_i & \sim \, \mathcal{N}\left(\mu, \Omega^{-1} \right) \\
\end{array}
\end{equation}
The parameters to estimate are $\theta = \{\mu, \Omega, \pi\}$.
## Data generation
We consider a simple settings in dimension $p=5$, with Toeplitz-like covariance.
```{r}
#| label: data-settings
N <- 100
p <- 5
d <- 1:p
Dsqrt <- diag(sqrt(d))
Sigma <- Dsqrt %*% toeplitz(0.75^(0:(p-1))) %*% Dsqrt
Omega <- solve(Sigma)
mu <- 5 + 1:p
pi <- c(0.25, 0, 0.8, 0.1, .5)
```
Here are some data (100 points):
```{r, warnings=FALSE, message=FALSE}
#| label: data-generation
W <- t(replicate(N, rbinom(p, prob = pi, size = 1)))
Y <- (1 - W) * rmvnorm(N, mu, Sigma)
ggplot(data.frame(y = c(Y))) + aes(x=y) + geom_histogram()
```
## Auxiliary functions
We need some auxiliary `nimble` functions to handle the density and generation of the random binomial vector $W$:
```{r}
#| label: vector-binomial
dbinom_vector <- nimbleFunction(
run = function( x = double(1),
size = double(1),
prob = double(1),
log = integer(0, default = 0)
) {
returnType(double(0))
logProb <- sum(dbinom(x, prob = prob, size = size, log = TRUE))
if(log) return(logProb) else return(exp(logProb))
})
rbinom_vector <- nimbleFunction(
run = function( n = integer(0, default = 1),
size = double(1),
prob = double(1)
) {
returnType(double(1))
return(rbinom(length(size), prob = prob, size = size))
})
```
## Nimble code and model for ZI-normal: V1
Rather than defining a probability density function for this model (which is in fact a bit complicated...), we adopt a generative approach:
```{r}
#| label: ZI-normal-code
ZInormal_code <- nimbleCode({
for (j in 1:p) {
mean[j] ~ dnorm(0,1)
}
for (j in 1:p) {
zeroProb[j] ~ dunif(0,1)
}
prec[1:p,1:p] ~ dwish(Ip[1:p,1:p], p)
for (i in 1:N) {
w[i, 1:p] ~ dbinom_vector(onep[1:p], zeroProb[1:p])
z[i, 1:p] ~ dmnorm(mean[1:p], prec[1:p,1:p])
ytilde[i, 1:p] <- (1 - w[i,1:p]) * z[i,1:p]
## P. Barbillon/M.-P. Étienne: astuce en zero
## a.k.a "I got a trick at zero"
y[i, 1:p] ~ dmnorm(ytilde[i, 1:p], prec_inf[1:p,1:p])
}
})
```
We can now define the `nimble` model for the ZI-normal model. We give some sound intial values for the parameters and latent variable, define some constants and provide the data:
```{r}
#| label: ZInormal-model
ZInormal_model <- nimbleModel(
ZInormal_code,
constants =
list(N = N, p = p, Ip = diag(1,p,p),
onep = rep(1,p), prec_inf = diag(1e5,p,p)),
data = list(y = Y, w = W),
inits = list(mean = rep(5,p), prec = diag(1,p,p), zeroProb=rep(0.5,p), z = Y))
```
## MCMC estimation
Let us run a simple 2-chain MCMC estimation
```{r}
#| label: ZI-normal-MCMC
#| cache: true
my_MCMC <- nimbleMCMC(
ZInormal_model,
monitors = c("mean", "prec", "zeroProb"),
nchains = 2,
niter = 1000,
samplesAsCodaMCMC = TRUE,
nburnin=100)
```
```{r echo=FALSE}
#| label: posterior-mean
#| fig-cap: 'Estimation of the mean $\mu$'
ggs(my_MCMC) %>% filter(stringr::str_detect(Parameter, "mean")) %>%
ggplot() + aes(x = Iteration, color = factor(Chain), y = value) + facet_wrap(~Parameter) + geom_line()
```
```{r}
#| label: true-mean
print(mu)
```
```{r echo=FALSE}
#| label: posterior-zeroProb
#| fig-cap: 'Estimation of the zero inflation probabilities $\pi$'
ggs(my_MCMC) %>% filter(stringr::str_detect(Parameter, "zeroProb")) %>%
ggplot() + aes(x = Iteration, color = factor(Chain), y = value) + facet_wrap(~Parameter) + geom_line()
```
```{r}
#| label: true-pi
print(pi)
```
```{r echo=FALSE}
#| label: posterior-prec
#| fig-cap: 'Estimation of the precision matrix $\Omega$'
ggs(my_MCMC) %>% filter(stringr::str_detect(Parameter, "prec")) %>%
ggplot() + aes(x = Iteration, color = factor(Chain), y = value) + facet_wrap(~Parameter) + geom_line()
```
```{r}
#| label: true-precision
print(round(Omega,3))
```
# Zero inflated multivariate normal revisited
All the above code uses a workaround to avoid defining a new distribution in Nimble which is a ZI multivariate normal.
Let $Y$ be a random vector.
We denote $\mathbf{i}_*$ the set of indexes for which $Y$ is non zero, and $\mathbf{i}_0$ the set of indices for which $Y$ is 0.
For the zero inflated normal distribution, the p.d.f. is given by:
$$
p(y \vert \mu, \Sigma, \pi) = \varphi(y_{\mathbf{i}_*}\vert \mu_{\mathbf{i}_*}, \Sigma_{\mathbf{i}_*;\mathbf{i}_*})\prod_{j\in \mathbf{i_0}}\pi_j \prod_{k\in \mathbf{i}_*}(1 - \pi_k)\,,
$$
where $\varphi(y_{\mathbf{i}_*}\vert \mu_{\mathbf{i}_*}, \Sigma_{\mathbf{i}_*;\mathbf{i}_*})$ is the p.d.f. of a multivariate normal distribution restricted to the indexes where $y$ is non 0 (and to the corresponding parameters).
This distribution can be coded as a `nimbleFunction`.
It is important here that the first argument must correspond to the value at which the density is evaluated, and that the `log` argument is mandatory.
```{r}
#| label: dZInormal
dZInormal <- nimbleFunction(
run = function(x = double(1),
prob = double(1),
mu = double(1),
sigma = double(2),
log = integer(0, default = 0)){
returnType(double(0))
non_nul_indexes <- which(x!=0)
nul_indexes <- which(x == 0)
p_term <- sum(log(prob[nul_indexes])) +
sum(log(1 - prob[non_nul_indexes]))
mu_term <- 0
if(length(non_nul_indexes) > 0){
chol_mat <- chol(sigma[non_nul_indexes, non_nul_indexes])
restricted_x = x[non_nul_indexes]
restricted_mu = mu[non_nul_indexes]
mu_term <- dmnorm_chol(restricted_x,
restricted_mu,
chol_mat, prec_param = FALSE, log = TRUE)
}
log_output <- p_term + mu_term
if(log){
return(log_output)
}
else{
return(exp(log_output))
}
}
)
```
From my experience, it is better to register this distribution to avoid any confusion about its parameters, nature and dimension of output, etc...
```{r}
registerDistributions(list(
dZInormal = list(BUGSdist = "dZInormal(prob, mu, sigma)", # How to call in nimble
discrete = FALSE, # Distribution is not discrete
pqAvail = FALSE, # CDF and quantile function are not available
types = c('value = double(1)', # The random variable is a vector
'prob = double(1)', # a vector
'mu = double(1)', # vector
'sigma = double(2)')) # double(2) is a matrix
))
```
Note that it tells us that we do not provide any way of simulating from this distribution. This is not a problem to run a standard MCMC.
Moreover, providing CDF and quantile function could allow some gain in efficiency (at least, that what the help pages says).
The code for the model is then direct:
```{r}
my_code <- nimbleCode({
for(j in 1:p){
prob[j] ~ dunif(0, 1)
mu[j] ~ dnorm(0, 1)
}
sigma[1:p, 1:p] ~ dwish(Ip[1:p, 1:p], p)
for(i in 1:n){
# I put my custom distribution
Y[i, 1:p] ~ dZInormal(prob[1:p], mu[1:p], sigma[1:p, 1:p])
}
})
```
And now, let's run it!
```{r}
#| label: fit-ZI-normal
#| cache: true
# Generating data
set.seed(123)
n_obs <- 1000
n_species <- 3
# Values
U <- mvtnorm::rmvnorm(n = n_obs,
mean = 0:(n_species - 1),
sigma = diag(1, n_species))
# Mask (matrix of zeros and ones)
Z <- rbinom(n = n_obs * n_species, size = 1, prob = .8) %>%
matrix(nrow = n_obs)
# Observations
Y <- round(U * Z, 9)
my_model <- nimbleModel(code = my_code,
constants = list(p = n_species,
n = n_obs,
Ip = diag(1, n_species)),
data = list(Y = Y),
inits = list(mu = rep(0, n_species),
prob = rep(0.5, n_species),
sigma = diag(1, n_species)))
results <- nimbleMCMC(my_model,
samplesAsCodaMCMC = TRUE,
nchains = 2, niter = 10000,
nburnin = 1000, thin = 10)
ggs(results) %>%
ggplot(aes(x = Iteration, y = value, color = factor(Chain))) +
facet_wrap(~Parameter, scales = "free") +
geom_line()
```
# Parallelisation with Nimble
Objectifs :
Paralléliser les calculs de plusieurs chaînes sur différents processeurs de la machine
Prendre en compte des graines différentes pour les chaînes
Comparer les temps de calcul entre :
-- séquentiel : 2 façons de faire (nimbleModel+compile+build+runMCMC ou nimbleMCMC où tout est intégré)
-- parallélisation de la fonction run_MCMC_allcode (nimbleModel+compile+build+runMCMC) avec parallel
-- parallélisation de la fonction run_MCMC_allcode (nimbleModel+compile+build+runMCMC) avec future
```{r}
#| echo: true
#| eval: false
set.seed(123) #For reproducibility
data_ex1 <- rnbinom(n = 100, prob = 0.4, size = 12)
code_neg_bin <- nimbleCode({
# Observation model
for(i in 1:n){# n is never defined before, it will be a constant
y[i] ~ dnbinom(prob, theta)
}
# PRIORS
prob ~ dunif(0, 1)
theta ~ dexp(0.1)
})
time0=Sys.time()
seed=c(123,234)
model_neg_bin <- nimbleModel(code = code_neg_bin,
name = "Negative binomial",
constants = list(n = length(data_ex1)),
data = list(y = data_ex1),
inits = list(prob = 0.5, theta = 1))
CmyModel <- compileNimble(model_neg_bin)
myMCMC <- buildMCMC(CmyModel) #, monitors = monitors)
CmyMCMC <- compileNimble(myMCMC)
results_1 <- runMCMC(CmyMCMC, setSeed = seed,
niter = 10000, # Number of it. per chain
thin = 10, # Thinning
nburnin = 1000,
nchains=2)
print(Sys.time()-time0)
#Time difference of 29.35223 secs
```
Ou directement les étapes de compile, build et run en même temps :
```{r}
#| echo: true
#| eval: false
time0=Sys.time()
results_1bis <- nimbleMCMC(model_neg_bin,
inits = list(prob = 0.5, theta = 1),
nchains = 2, # Number of independent chains
niter = 10000, # Number of it. per chain
thin = 10, # Thinning
nburnin = 1000) # Number of initial iterations discarded
print(Sys.time()-time0)
# Time difference of 18.00453 secs
```
Graphs des chaines (optionnel)
```{r}
#| echo: true
#| eval: false
formatted_results <- imap_dfr(results_1,
function(x, nm){
as.data.frame(x) %>%
rowid_to_column(var = "Iteration") %>%
mutate(Chain = str_remove(nm, "chain"))
}) %>%
pivot_longer(cols = -c("Iteration", "Chain"),
names_to = "Parameter",
values_to = "value")
ggplot(formatted_results) +
aes(x = Iteration,
y = value, color = Chain) +
facet_wrap(~Parameter, scales = "free") +
geom_line() +
labs(x = "Sample ID", y = "Parameter value", color = "")
formatted_results <- imap_dfr(results_1bis,
function(x, nm){
as.data.frame(x) %>%
rowid_to_column(var = "Iteration") %>%
mutate(Chain = str_remove(nm, "chain"))
}) %>%
pivot_longer(cols = -c("Iteration", "Chain"),
names_to = "Parameter",
values_to = "value")
ggplot(formatted_results) +
aes(x = Iteration,
y = value, color = Chain) +
facet_wrap(~Parameter, scales = "free") +
geom_line() +
labs(x = "Sample ID", y = "Parameter value", color = "")
```
##Temps de calcul 2 chaînes en séquentiel avec les 2 graines :
```{r}
#| echo: true
#| eval: false
time0=Sys.time()
posterior_samples_neg_bin1 <- nimbleMCMC(model_neg_bin,
inits = list(prob = 0.5, theta = 1),
nchains = 1, # Number of independent chains
niter = 10000, # Number of it. per chain
thin = 10, # Thinning
nburnin = 1000,
setSeed=123) # Number of initial iterations discarded
posterior_samples_neg_bin2 <- nimbleMCMC(model_neg_bin,
inits = list(prob = 0.5, theta = 1),
nchains = 1, # Number of independent chains
niter = 10000, # Number of it. per chain
thin = 10, # Thinning
nburnin = 1000,
setSeed=234) # Number of initial iterations discarded
print(Sys.time()-time0)
# Time difference of 35.44853 secs
```
## Test de parallelisation avec le tuto du site Nimble
```{r}
#| echo: true
#| eval: false
#https://r-nimble.org/nimbleExamples/parallelizing_NIMBLE.html
#library(parallel)
model_neg_bin <- nimbleModel(code = code_neg_bin,
name = "Negative binomial",
constants = list(n = length(data_ex1)),
data = list(y = data_ex1))
posterior_samples_neg_bin <- nimbleMCMC(model_neg_bin,
inits = list(prob = 0.5, theta = 1),
nchains = 2, # Number of independent chains
niter = 10000, # Number of it. per chain
thin = 10, # Thinning
nburnin = 1000) # Number of initial iterations discarded
workers <- makeCluster(3)
plan(multisession)
myData <- data_ex1
run_MCMC_allcode <- function(seed=seed, data=myData, code=code){ #, useWAIC = F) {
library(nimble)
model_neg_bin <- nimbleModel(code = code,
name = "Negative binomial",
constants = list(n = length(data)),
data = list(y = data),
inits = list(prob = 0.5, theta = 1))
CmyModel <- compileNimble(model_neg_bin)
#if(useWAIC)
# monitors <- myModel$getParents(model_neg_bin$getNodeNames(dataOnly = TRUE), stochOnly = TRUE)
## One may also wish to add additional monitors
myMCMC <- buildMCMC(CmyModel) #, monitors = monitors)
CmyMCMC <- compileNimble(myMCMC)
results <- runMCMC(CmyMCMC, setSeed = seed,
niter = 10000, # Number of it. per chain
thin = 10, # Thinning
nburnin = 1000)
return(results)
}
time0=Sys.time()
chain_output <- parLapply(cl = workers, X = 1:3,
fun = run_MCMC_allcode,
data = myData, code = code_neg_bin)#,
#useWAIC = F)
print(Sys.time()-time0)
# Time difference of 27.86264 secs
# It's good practice to close the cluster when you're done with it.
stopCluster(workers)
```
## Le meilleur pour la fin : test de parallelisation avec future à partir du tuto d'Aymeric Stamm
```{r}
#| echo: true
#| eval: false
#library(future)
#library(furrr)
plan(multisession,workers=2)
data_ex1 <- rnbinom(n = 100, prob = 0.4, size = 12)
myData <- data_ex1
seed=c(123,234)
myCode <- nimbleCode({
# Observation model
for(i in 1:n){
y[i] ~ dnbinom(prob, theta)
}
# PRIORS
prob ~ dunif(0, 1)
theta ~ dexp(0.1)
})
run_MCMC_allcode <- function(seed=seed, data=myData, code=myCode){ #, useWAIC = F) {
library(nimble)
model_neg_bin <- nimbleModel(code = code,
name = "Negative binomial",
constants = list(n = length(data)),
data = list(y = data),
inits = list(prob = 0.5, theta = 1))
CmyModel <- compileNimble(model_neg_bin)
myMCMC <- buildMCMC(CmyModel)
CmyMCMC <- compileNimble(myMCMC)
results <- runMCMC(CmyMCMC, setSeed = seed,
niter = 10000, # Number of it. per chain
thin = 10, # Thinning
nburnin = 1000)
return(results)
}
# temps de calcul avec parallélisation des 2 chaînes avec des graines différentes
time0=Sys.time()
fMCMC <- future_map(.x=seed, .f= run_MCMC_allcode,.options=furrr_options(seed=TRUE))
print(Sys.time()-time0)
#Time difference of 23.59011 secs
```
## And the winner is... ?