Add to the following model varying slopes on the predictor
answer:
Think up a context in which varying intercepts will be positively correlated with varying slopes. Provide a mechanistic explanation for the correlation.
Competitive plant growth, +/- true shade. LArge plants will outcomptete smaller plants and get even bigger. small plants won't be able to complete and will stay small.
Whenisitpossibleforavaryingslopesmodeltohavefewereffectiveparameters(asestimated by WAIC or DIC) than the corresponding model with fixed (unpooled) slopes? Explain.
If there is strong correlation between intercept and slope, then effectively fewer parameters need to be estimated (because if you know intercept you know slope).
Repeat the café robot simulation from the beginning of the chapter. This time, set rho to zero, so that there is no correlation between intercepts and slopes. How does the posterior distribution of the correlation reflect this change in the underlying simulation?
Simulate some cafes
## R code 14.1
a <- 3.5 # average morning wait time
b <- (-1) # average difference afternoon wait time
sigma_a <- 1 # std dev in intercepts
sigma_b <- 0.5 # std dev in slopes
rho <- (-0.7) # correlation between intercepts and slopes
## R code 14.2
Mu <- c( a , b )
## R code 14.5
sigmas <- c(sigma_a,sigma_b) # standard deviations
Rho <- matrix( c(1,rho,rho,1) , nrow=2 ) # correlation matrix
# now matrix multiply to get covariance matrix
Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas)
Sigma
## [,1] [,2]
## [1,] 1.00 -0.35
## [2,] -0.35 0.25
## R code 14.6
N_cafes <- 20
## R code 14.7
library(MASS)
set.seed(5) # used to replicate example
vary_effects <- mvrnorm( N_cafes , Mu , Sigma )
## R code 14.8
a_cafe <- vary_effects[,1]
b_cafe <- vary_effects[,2]
now simulate sampling
## R code 14.10
set.seed(22)
N_visits <- 10
afternoon <- rep(0:1,N_visits*N_cafes/2)
cafe_id <- rep( 1:N_cafes , each=N_visits )
mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5 # std dev within cafes
wait <- rnorm( N_visits*N_cafes , mu , sigma )
d <- data.frame( cafe=cafe_id , afternoon=afternoon , wait=wait )
## R code 14.12
m14.1 <- ulam(
alist(
wait ~ normal( mu , sigma ),
mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
c(a_cafe,b_cafe)[cafe] ~ multi_normal( c(a,b) , Rho , sigma_cafe ),
a ~ normal(5,2),
b ~ normal(-1,0.5),
sigma_cafe ~ exponential(1),
sigma ~ exponential(1),
Rho ~ lkj_corr(2)
) , data=d , chains=4 , cores=4, log_lik = TRUE )
Simulate some cafes
## R code 14.1
a <- 3.5 # average morning wait time
b <- (-1) # average difference afternoon wait time
sigma_a <- 1 # std dev in intercepts
sigma_b <- 0.5 # std dev in slopes
rho <- (0) # correlation between intercepts and slopes
## R code 14.2
Mu <- c( a , b )
## R code 14.5
sigmas <- c(sigma_a,sigma_b) # standard deviations
Rho <- matrix( c(1,rho,rho,1) , nrow=2 ) # correlation matrix
# now matrix multiply to get covariance matrix
Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas)
Sigma
## [,1] [,2]
## [1,] 1 0.00
## [2,] 0 0.25
## R code 14.6
N_cafes <- 20
## R code 14.7
set.seed(5) # used to replicate example
vary_effects <- mvrnorm( N_cafes , Mu , Sigma )
## R code 14.8
a_cafe <- vary_effects[,1]
b_cafe <- vary_effects[,2]
now simulate sampling
## R code 14.10
set.seed(22)
N_visits <- 10
afternoon <- rep(0:1,N_visits*N_cafes/2)
cafe_id <- rep( 1:N_cafes , each=N_visits )
mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5 # std dev within cafes
wait <- rnorm( N_visits*N_cafes , mu , sigma )
d_zero_rho <- data.frame( cafe=cafe_id , afternoon=afternoon , wait=wait )
## R code 14.12
m14.1alt <- ulam(
alist(
wait ~ normal( mu , sigma ),
mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
c(a_cafe,b_cafe)[cafe] ~ multi_normal( c(a,b) , Rho , sigma_cafe ),
a ~ normal(5,2),
b ~ normal(-1,0.5),
sigma_cafe ~ exponential(1),
sigma ~ exponential(1),
Rho ~ lkj_corr(2)
) , data=d_zero_rho , chains=4 , cores=4, log_lik = TRUE )
## recompiling to avoid crashing R session
precis(m14.1, depth=3, pars=c("a", "b", "Rho"))
## mean sd 5.5% 94.5% n_eff Rhat
## a 3.6540560 2.216840e-01 3.3042476 4.0038041 2534.471 0.9985916
## b -1.1415404 1.437323e-01 -1.3683510 -0.9123456 2473.478 0.9995964
## Rho[1,1] 1.0000000 0.000000e+00 1.0000000 1.0000000 NaN NaN
## Rho[1,2] -0.5020623 1.802186e-01 -0.7533067 -0.1868229 2463.166 0.9989320
## Rho[2,1] -0.5020623 1.802186e-01 -0.7533067 -0.1868229 2463.166 0.9989320
## Rho[2,2] 1.0000000 8.220706e-17 1.0000000 1.0000000 2112.773 0.9979980
precis(m14.1alt, depth = 3, pars=c("a", "b", "Rho"))
## mean sd 5.5% 94.5% n_eff Rhat
## a 3.71558153 2.258057e-01 3.3675259 4.0741742 2267.265 1.001063
## b -1.10250653 1.473425e-01 -1.3329514 -0.8739319 2104.294 1.000082
## Rho[1,1] 1.00000000 0.000000e+00 1.0000000 1.0000000 NaN NaN
## Rho[1,2] -0.05914906 2.319446e-01 -0.4208165 0.3257339 1887.576 1.001780
## Rho[2,1] -0.05914906 2.319446e-01 -0.4208165 0.3257339 1887.576 1.001780
## Rho[2,2] 1.00000000 9.527034e-17 1.0000000 1.0000000 2044.376 0.997998
correlation estimate now ~ zero, as expected.
Fit this multilevel model (separate pooling) to the simulated café data:
## R code 14.12
m14.M2 <- ulam(
alist(
wait ~ normal( mu , sigma ),
mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
a_cafe[cafe] ~ normal(a_bar, a_sigma),
b_cafe[cafe] ~ normal(b_bar, b_sigma),
a_bar ~ normal(5,2),
b_bar ~ normal(-1,0.5),
a_sigma ~ exponential(1),
b_sigma ~ exponential(1),
sigma ~ exponential(1)
) , data=d , chains=4 , cores=4, log_lik=TRUE )
compare(m14.1, m14.M2)
## WAIC SE dWAIC dSE pWAIC weight
## m14.1 305.2609 17.78987 0.0000000 NA 33.00102 0.5478858
## m14.M2 305.6451 17.99910 0.3842642 2.137314 32.27162 0.4521142
pretty much the same. had hoped this would make the case for correlative pooling...
Simulate some cafes
## R code 14.1
a <- 3.5 # average morning wait time
b <- (-1) # average difference afternoon wait time
sigma_a <- 1 # std dev in intercepts
sigma_b <- 0.5 # std dev in slopes
rho <- (-0.7) # correlation between intercepts and slopes
## R code 14.2
Mu <- c( a , b )
## R code 14.3
cov_ab <- sigma_a*sigma_b*rho
Sigma <- matrix( c(sigma_a^2,cov_ab,cov_ab,sigma_b^2) , ncol=2 )
## R code 14.4
matrix( c(1,2,3,4) , nrow=2 , ncol=2 )
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
## R code 14.5
sigmas <- c(sigma_a,sigma_b) # standard deviations
Rho <- matrix( c(1,rho,rho,1) , nrow=2 ) # correlation matrix
# now matrix multiply to get covariance matrix
Sigma <- diag(sigmas) %*% Rho %*% diag(sigmas)
Sigma
## [,1] [,2]
## [1,] 1.00 -0.35
## [2,] -0.35 0.25
## R code 14.6
N_cafes <- 20
## R code 14.7
library(MASS)
set.seed(5) # used to replicate example
vary_effects <- mvrnorm( N_cafes , Mu , Sigma )
## R code 14.8
a_cafe <- vary_effects[,1]
b_cafe <- vary_effects[,2]
## R code 14.9
plot( a_cafe , b_cafe , col=rangi2 ,
xlab="intercepts (a_cafe)" , ylab="slopes (b_cafe)" )
# overlay population distribution
library(ellipse)
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:rethinking':
##
## pairs
## The following object is masked from 'package:graphics':
##
## pairs
for ( l in c(0.1,0.3,0.5,0.8,0.99) )
lines(ellipse(Sigma,centre=Mu,level=l),col=col.alpha("black",0.2))
now simulate sampling
## R code 14.10
set.seed(22)
N_visits <- 10
afternoon <- rep(0:1,N_visits*N_cafes/2)
cafe_id <- rep( 1:N_cafes , each=N_visits )
mu <- a_cafe[cafe_id] + b_cafe[cafe_id]*afternoon
sigma <- 0.5 # std dev within cafes
wait <- rnorm( N_visits*N_cafes , mu , sigma )
d <- data.frame( cafe=cafe_id , afternoon=afternoon , wait=wait )
d
## cafe afternoon wait
## 1 1 0 3.9678929
## 2 1 1 3.8571978
## 3 1 0 4.7278755
## 4 1 1 2.7610133
## 5 1 0 4.1194827
## 6 1 1 3.5436522
## 7 1 0 4.1909492
## 8 1 1 2.5332235
## 9 1 0 4.1240321
## 10 1 1 2.7648868
## 11 2 0 1.6285444
## 12 2 1 1.2997086
## 13 2 0 2.3820122
## 14 2 1 1.2167166
## 15 2 0 1.6140508
## 16 2 1 0.7976508
## 17 2 0 2.4412792
## 18 2 1 2.2601987
## 19 2 0 2.4787735
## 20 2 1 0.4508602
## 21 3 0 4.2782826
## 22 3 1 2.6155598
## 23 3 0 4.2277546
## 24 3 1 2.0927322
## 25 3 0 4.2941706
## 26 3 1 2.8956186
## 27 3 0 4.6922298
## 28 3 1 2.1666390
## 29 3 0 4.9780066
## 30 3 1 1.8374065
## 31 4 0 3.6126323
## 32 4 1 1.5168045
## 33 4 0 3.6638952
## 34 4 1 1.8831005
## 35 4 0 2.8338139
## 36 4 1 1.7470407
## 37 4 0 3.3720479
## 38 4 1 2.6260871
## 39 4 0 2.7802535
## 40 4 1 2.0503973
## 41 5 0 1.5867226
## 42 5 1 1.3947669
## 43 5 0 2.0753433
## 44 5 1 2.0745109
## 45 5 0 2.2045744
## 46 5 1 2.7420834
## 47 5 0 1.6200965
## 48 5 1 1.2822865
## 49 5 0 1.1116341
## 50 5 1 1.6542109
## 51 6 0 3.6362822
## 52 6 1 3.2534341
## 53 6 0 3.3889599
## 54 6 1 3.2106004
## 55 6 0 4.8480520
## 56 6 1 2.9567778
## 57 6 0 4.9368840
## 58 6 1 2.5660084
## 59 6 0 4.6554610
## 60 6 1 2.8368435
## 61 7 0 3.3984235
## 62 7 1 2.8788324
## 63 7 0 3.7173417
## 64 7 1 2.1021608
## 65 7 0 3.3520633
## 66 7 1 3.0247287
## 67 7 0 3.8741506
## 68 7 1 3.2205101
## 69 7 0 3.6474687
## 70 7 1 1.8385930
## 71 8 0 3.6971708
## 72 8 1 2.1305167
## 73 8 0 3.7303181
## 74 8 1 2.0347525
## 75 8 0 4.1198843
## 76 8 1 1.9878978
## 77 8 0 3.5352774
## 78 8 1 2.3410838
## 79 8 0 5.0074088
## 80 8 1 2.6806061
## 81 9 0 4.4105403
## 82 9 1 3.1360459
## 83 9 0 3.5485809
## 84 9 1 3.2839353
## 85 9 0 4.7034413
## 86 9 1 2.1927989
## 87 9 0 4.1604345
## 88 9 1 2.3370569
## 89 9 0 3.2290943
## 90 9 1 2.3470523
## 91 10 0 3.7790783
## 92 10 1 2.7815778
## 93 10 0 3.6908124
## 94 10 1 2.6842041
## 95 10 0 3.6739399
## 96 10 1 2.7526249
## 97 10 0 3.3009887
## 98 10 1 2.5882405
## 99 10 0 3.2170385
## 100 10 1 2.4008392
## 101 11 0 1.3866774
## 102 11 1 1.2251146
## 103 11 0 1.5374481
## 104 11 1 1.8911030
## 105 11 0 2.0440251
## 106 11 1 1.4825473
## 107 11 0 2.3053675
## 108 11 1 2.0147873
## 109 11 0 1.8050769
## 110 11 1 1.0450414
## 111 12 0 4.0865711
## 112 12 1 1.6610791
## 113 12 0 3.4982132
## 114 12 1 2.2208548
## 115 12 0 2.9146148
## 116 12 1 3.0898881
## 117 12 0 4.5563579
## 118 12 1 3.3040109
## 119 12 0 4.1924795
## 120 12 1 3.0147962
## 121 13 0 3.9669914
## 122 13 1 2.0143616
## 123 13 0 4.0823665
## 124 13 1 1.2142337
## 125 13 0 3.8813289
## 126 13 1 1.8169712
## 127 13 0 3.5927248
## 128 13 1 2.7553118
## 129 13 0 4.3818435
## 130 13 1 1.9490383
## 131 14 0 3.2426154
## 132 14 1 1.6085752
## 133 14 0 2.4233729
## 134 14 1 1.8561347
## 135 14 0 4.1243762
## 136 14 1 2.3101760
## 137 14 0 3.1193893
## 138 14 1 2.4563408
## 139 14 0 2.8033230
## 140 14 1 2.9760704
## 141 15 0 5.1810238
## 142 15 1 2.5779341
## 143 15 0 4.4748711
## 144 15 1 1.8505316
## 145 15 0 4.9231077
## 146 15 1 2.2967337
## 147 15 0 4.3611941
## 148 15 1 1.6717649
## 149 15 0 4.0922197
## 150 15 1 2.0918128
## 151 16 0 3.1394434
## 152 16 1 1.9818328
## 153 16 0 3.7947913
## 154 16 1 2.9945802
## 155 16 0 3.4741649
## 156 16 1 2.2611456
## 157 16 0 3.5614384
## 158 16 1 2.4030906
## 159 16 0 2.8976436
## 160 16 1 2.0986390
## 161 17 0 3.8733157
## 162 17 1 3.0278342
## 163 17 0 4.4076139
## 164 17 1 3.1162877
## 165 17 0 4.0792206
## 166 17 1 2.8800927
## 167 17 0 4.6189638
## 168 17 1 3.0648752
## 169 17 0 4.2018437
## 170 17 1 2.9800223
## 171 18 0 4.8686274
## 172 18 1 4.8269026
## 173 18 0 5.3579401
## 174 18 1 4.7056985
## 175 18 0 6.5198872
## 176 18 1 4.8927955
## 177 18 0 6.1303904
## 178 18 1 5.1833383
## 179 18 0 5.9030889
## 180 18 1 4.7881800
## 181 19 0 3.7296292
## 182 19 1 2.9772458
## 183 19 0 2.6228565
## 184 19 1 3.2854654
## 185 19 0 3.8941007
## 186 19 1 3.0659216
## 187 19 0 2.7767559
## 188 19 1 3.4586721
## 189 19 0 2.5819582
## 190 19 1 2.8900848
## 191 20 0 3.6168188
## 192 20 1 1.9505858
## 193 20 0 3.8823709
## 194 20 1 2.9197521
## 195 20 0 3.5691201
## 196 20 1 2.1098063
## 197 20 0 3.7900409
## 198 20 1 2.8376260
## 199 20 0 3.7840564
## 200 20 1 3.6340586
## R code 14.11
R <- rlkjcorr( 1e4 , K=2 , eta=2 )
dens( R[,1,2] , xlab="correlation" )
## R code 14.12
m14.1 <- ulam(
alist(
wait ~ normal( mu , sigma ),
mu <- a_cafe[cafe] + b_cafe[cafe]*afternoon,
c(a_cafe,b_cafe)[cafe] ~ multi_normal( c(a,b) , Rho , sigma_cafe ),
a ~ normal(5,2),
b ~ normal(-1,0.5),
sigma_cafe ~ exponential(1),
sigma ~ exponential(1),
Rho ~ lkj_corr(2)
) , data=d , chains=4 , cores=4 )
## R code 14.13
post <- extract.samples(m14.1)
dens( post$Rho[,1,2] )
## R code 14.14
# compute unpooled estimates directly from data
a1 <- sapply( 1:N_cafes ,
function(i) mean(wait[cafe_id==i & afternoon==0]) )
b1 <- sapply( 1:N_cafes ,
function(i) mean(wait[cafe_id==i & afternoon==1]) ) - a1
# extract posterior means of partially pooled estimates
post <- extract.samples(m14.1)
a2 <- apply( post$a_cafe , 2 , mean )
b2 <- apply( post$b_cafe , 2 , mean )
# plot both and connect with lines
plot( a1 , b1 , xlab="intercept" , ylab="slope" ,
pch=16 , col=rangi2 , ylim=c( min(b1)-0.1 , max(b1)+0.1 ) ,
xlim=c( min(a1)-0.1 , max(a1)+0.1 ) )
points( a2 , b2 , pch=1 )
for ( i in 1:N_cafes ) lines( c(a1[i],a2[i]) , c(b1[i],b2[i]) )
## R code 14.15
# compute posterior mean bivariate Gaussian
Mu_est <- c( mean(post$a) , mean(post$b) )
rho_est <- mean( post$Rho[,1,2] )
sa_est <- mean( post$sigma_cafe[,1] )
sb_est <- mean( post$sigma_cafe[,2] )
cov_ab <- sa_est*sb_est*rho_est
Sigma_est <- matrix( c(sa_est^2,cov_ab,cov_ab,sb_est^2) , ncol=2 )
# draw contours
library(ellipse)
for ( l in c(0.1,0.3,0.5,0.8,0.99) )
lines(ellipse(Sigma_est,centre=Mu_est,level=l),
col=col.alpha("black",0.2))
## R code 14.16
# convert varying effects to waiting times
wait_morning_1 <- (a1)
wait_afternoon_1 <- (a1 + b1)
wait_morning_2 <- (a2)
wait_afternoon_2 <- (a2 + b2)
# plot both and connect with lines
plot( wait_morning_1 , wait_afternoon_1 , xlab="morning wait" ,
ylab="afternoon wait" , pch=16 , col=rangi2 ,
ylim=c( min(wait_afternoon_1)-0.1 , max(wait_afternoon_1)+0.1 ) ,
xlim=c( min(wait_morning_1)-0.1 , max(wait_morning_1)+0.1 ) )
points( wait_morning_2 , wait_afternoon_2 , pch=1 )
for ( i in 1:N_cafes )
lines( c(wait_morning_1[i],wait_morning_2[i]) ,
c(wait_afternoon_1[i],wait_afternoon_2[i]) )
abline( a=0 , b=1 , lty=2 )
## R code 14.17
# now shrinkage distribution by simulation
v <- mvrnorm( 1e4 , Mu_est , Sigma_est )
v[,2] <- v[,1] + v[,2] # calculate afternoon wait
Sigma_est2 <- cov(v)
Mu_est2 <- Mu_est
Mu_est2[2] <- Mu_est[1]+Mu_est[2]
# draw contours
library(ellipse)
for ( l in c(0.1,0.3,0.5,0.8,0.99) )
lines(ellipse(Sigma_est2,centre=Mu_est2,level=l),
col=col.alpha("black",0.5))
## R code 14.18
library(rethinking)
data(chimpanzees)
d <- chimpanzees
d$block_id <- d$block
d$treatment <- 1L + d$prosoc_left + 2L*d$condition
dat <- list(
L = d$pulled_left,
tid = d$treatment,
actor = d$actor,
block_id = as.integer(d$block_id) )
m14.2 <- ulam(
alist(
L ~ binomial(1,p),
logit(p) <- g[tid] + alpha[actor,tid] + beta[block_id,tid],
# adaptive priors
vector[4]:alpha[actor] ~ multi_normal(0,Rho_actor,sigma_actor),
vector[4]:beta[block_id] ~ multi_normal(0,Rho_block,sigma_block),
# fixed priors
g[tid] ~ dnorm(0,1),
sigma_actor ~ dexp(1),
Rho_actor ~ dlkjcorr(4),
sigma_block ~ dexp(1),
Rho_block ~ dlkjcorr(4)
) , data=dat , chains=4 , cores=4 )
## R code 14.19
m14.3 <- ulam(
alist(
L ~ binomial(1,p),
logit(p) <- g[tid] + alpha[actor,tid] + beta[block_id,tid],
# adaptive priors - non-centered
transpars> matrix[actor,4]:alpha <-
compose_noncentered( sigma_actor , L_Rho_actor , z_actor ),
transpars> matrix[block_id,4]:beta <-
compose_noncentered( sigma_block , L_Rho_block , z_block ),
matrix[4,actor]:z_actor ~ normal( 0 , 1 ),
matrix[4,block_id]:z_block ~ normal( 0 , 1 ),
# fixed priors
g[tid] ~ normal(0,1),
vector[4]:sigma_actor ~ dexp(1),
cholesky_factor_corr[4]:L_Rho_actor ~ lkj_corr_cholesky( 2 ),
vector[4]:sigma_block ~ dexp(1),
cholesky_factor_corr[4]:L_Rho_block ~ lkj_corr_cholesky( 2 ),
# compute ordinary correlation matrixes from Cholesky factors
gq> matrix[4,4]:Rho_actor <<- multiply_lower_tri_self_transpose(L_Rho_actor),
gq> matrix[4,4]:Rho_block <<- multiply_lower_tri_self_transpose(L_Rho_block)
) , data=dat , chains=4 , cores=4 , log_lik=TRUE )
## R code 14.20
# extract n_eff values for each model
neff_nc <- precis(m14.3,3,pars=c("alpha","beta"))$n_eff
neff_c <- precis(m14.2,3,pars=c("alpha","beta"))$n_eff
plot( neff_c , neff_nc , xlab="centered (default)" ,
ylab="non-centered (cholesky)" , lwd=1.5 )
abline(a=0,b=1,lty=2)
## R code 14.21
precis( m14.3 , depth=2 , pars=c("sigma_actor","sigma_block") )
## R code 14.22
# compute mean for each actor in each treatment
pl <- by( d$pulled_left , list( d$actor , d$treatment ) , mean )
# generate posterior predictions using link
datp <- list(
actor=rep(1:7,each=4) ,
tid=rep(1:4,times=7) ,
block_id=rep(5,times=4*7) )
p_post <- link( m14.3 , data=datp )
p_mu <- apply( p_post , 2 , mean )
p_ci <- apply( p_post , 2 , PI )
# set up plot
plot( NULL , xlim=c(1,28) , ylim=c(0,1) , xlab="" ,
ylab="proportion left lever" , xaxt="n" , yaxt="n" )
axis( 2 , at=c(0,0.5,1) , labels=c(0,0.5,1) )
abline( h=0.5 , lty=2 )
for ( j in 1:7 ) abline( v=(j-1)*4+4.5 , lwd=0.5 )
for ( j in 1:7 ) text( (j-1)*4+2.5 , 1.1 , concat("actor ",j) , xpd=TRUE )
xo <- 0.1 # offset distance to stagger raw data and predictions
# raw data
for ( j in (1:7)[-2] ) {
lines( (j-1)*4+c(1,3)-xo , pl[j,c(1,3)] , lwd=2 , col=rangi2 )
lines( (j-1)*4+c(2,4)-x0 , pl[j,c(2,4)] , lwd=2 , col=rangi2 )
}
points( 1:28-xo , t(pl) , pch=16 , col="white" , cex=1.7 )
points( 1:28-xo , t(pl) , pch=c(1,1,16,16) , col=rangi2 , lwd=2 )
yoff <- 0.175
text( 1-xo , pl[1,1]-yoff , "R/N" , pos=1 , cex=0.8 )
text( 2-xo , pl[1,2]+yoff , "L/N" , pos=3 , cex=0.8 )
text( 3-xo , pl[1,3]-yoff , "R/P" , pos=1 , cex=0.8 )
text( 4-xo , pl[1,4]+yoff , "L/P" , pos=3 , cex=0.8 )
# posterior predictions
for ( j in (1:7)[-2] ) {
lines( (j-1)*4+c(1,3)+xo , p_mu[(j-1)*4+c(1,3)] , lwd=2 )
lines( (j-1)*4+c(2,4)+xo , p_mu[(j-1)*4+c(2,4)] , lwd=2 )
}
for ( i in 1:28 ) lines( c(i,i)+xo , p_ci[,i] , lwd=1 )
points( 1:28+xo , p_mu , pch=16 , col="white" , cex=1.3 )
points( 1:28+xo , p_mu , pch=c(1,1,16,16) )
## R code 14.23
set.seed(73)
N <- 500
U_sim <- rnorm( N )
Q_sim <- sample( 1:4 , size=N , replace=TRUE )
E_sim <- rnorm( N , U_sim + Q_sim )
W_sim <- rnorm( N , U_sim + 0*E_sim )
dat_sim <- list(
W=standardize(W_sim) ,
E=standardize(E_sim) ,
Q=standardize(Q_sim) )
## R code 14.24
m14.4 <- ulam(
alist(
W ~ dnorm( mu , sigma ),
mu <- aW + bEW*E,
aW ~ dnorm( 0 , 0.2 ),
bEW ~ dnorm( 0 , 0.5 ),
sigma ~ dexp( 1 )
) , data=dat_sim , chains=4 , cores=4 )
precis( m14.4 )
## R code 14.25
m14.5 <- ulam(
alist(
c(W,E) ~ multi_normal( c(muW,muE) , Rho , Sigma ),
muW <- aW + bEW*E,
muE <- aE + bQE*Q,
c(aW,aE) ~ normal( 0 , 0.2 ),
c(bEW,bQE) ~ normal( 0 , 0.5 ),
Rho ~ lkj_corr( 2 ),
Sigma ~ exponential( 1 )
), data=dat_sim , chains=4 , cores=4 )
precis( m14.5 , depth=3 )
## R code 14.26
m14.4x <- ulam( m14.4 , data=dat_sim , chains=4 , cores=4 )
m14.5x <- ulam( m14.5 , data=dat_sim , chains=4 , cores=4 )
## R code 14.27
set.seed(73)
N <- 500
U_sim <- rnorm( N )
Q_sim <- sample( 1:4 , size=N , replace=TRUE )
E_sim <- rnorm( N , U_sim + Q_sim )
W_sim <- rnorm( N , -U_sim + 0.2*E_sim )
dat_sim <- list(
W=standardize(W_sim) ,
E=standardize(E_sim) ,
Q=standardize(Q_sim) )
## R code 14.28
library(dagitty)
dagIV <- dagitty( "dag{
E -> W
E <- U -> W
Q -> E
}")
instrumentalVariables( dagIV , exposure="E" , outcome="W" )
## R code 14.29
library(rethinking)
data(KosterLeckie)
## R code 14.30
kl_data <- list(
N = nrow(kl_dyads),
N_households = max(kl_dyads$hidB),
did = kl_dyads$did,
hidA = kl_dyads$hidA,
hidB = kl_dyads$hidB,
giftsAB = kl_dyads$giftsAB,
giftsBA = kl_dyads$giftsBA
)
m14.4 <- ulam(
alist(
giftsAB ~ poisson( lambdaAB ),
giftsBA ~ poisson( lambdaBA ),
log(lambdaAB) <- a + gr[hidA,1] + gr[hidB,2] + d[did,1] ,
log(lambdaBA) <- a + gr[hidB,1] + gr[hidA,2] + d[did,2] ,
a ~ normal(0,1),
## gr matrix of varying effects
vector[2]:gr[N_households] ~ multi_normal(0,Rho_gr,sigma_gr),
Rho_gr ~ lkj_corr(4),
sigma_gr ~ exponential(1),
## dyad effects
transpars> matrix[N,2]:d <-
compose_noncentered( rep_vector(sigma_d,2) , L_Rho_d , z ),
matrix[2,N]:z ~ normal( 0 , 1 ),
cholesky_factor_corr[2]:L_Rho_d ~ lkj_corr_cholesky( 8 ),
sigma_d ~ exponential(1),
## compute correlation matrix for dyads
gq> matrix[2,2]:Rho_d <<- Chol_to_Corr( L_Rho_d )
), data=kl_data , chains=4 , cores=4 , iter=2000 )
## R code 14.31
precis( m14.4 , depth=3 , pars=c("Rho_gr","sigma_gr") )
## R code 14.32
post <- extract.samples( m14.4 )
g <- sapply( 1:25 , function(i) post$a + post$gr[,i,1] )
r <- sapply( 1:25 , function(i) post$a + post$gr[,i,2] )
Eg_mu <- apply( exp(g) , 2 , mean )
Er_mu <- apply( exp(r) , 2 , mean )
## R code 14.33
plot( NULL , xlim=c(0,8.6) , ylim=c(0,8.6) , xlab="generalized giving" ,
ylab="generalized receiving" , lwd=1.5 )
abline(a=0,b=1,lty=2)
# ellipses
library(ellipse)
for ( i in 1:25 ) {
Sigma <- cov( cbind( g[,i] , r[,i] ) )
Mu <- c( mean(g[,i]) , mean(r[,i]) )
for ( l in c(0.5) ) {
el <- ellipse( Sigma , centre=Mu , level=l )
lines( exp(el) , col=col.alpha("black",0.5) )
}
}
# household means
points( Eg_mu , Er_mu , pch=21 , bg="white" , lwd=1.5 )
## R code 14.34
precis( m14.4 , depth=3 , pars=c("Rho_d","sigma_d") )
## R code 14.35
dy1 <- apply( post$d[,,1] , 2 , mean )
dy2 <- apply( post$d[,,2] , 2 , mean )
plot( dy1 , dy2 )
## R code 14.36
# load the distance matrix
library(rethinking)
data(islandsDistMatrix)
# display (measured in thousands of km)
Dmat <- islandsDistMatrix
colnames(Dmat) <- c("Ml","Ti","SC","Ya","Fi","Tr","Ch","Mn","To","Ha")
round(Dmat,1)
## R code 14.37
# linear
curve( exp(-1*x) , from=0 , to=4 , lty=2 ,
xlab="distance" , ylab="correlation" )
# squared
curve( exp(-1*x^2) , add=TRUE )
## R code 14.38
data(Kline2) # load the ordinary data, now with coordinates
d <- Kline2
d$society <- 1:10 # index observations
dat_list <- list(
T = d$total_tools,
P = d$population,
society = d$society,
Dmat=islandsDistMatrix )
m14.7 <- ulam(
alist(
T ~ dpois(lambda),
lambda <- (a*P^b/g)*exp(k[society]),
vector[10]:k ~ multi_normal( 0 , SIGMA ),
matrix[10,10]:SIGMA <- cov_GPL2( Dmat , etasq , rhosq , 0.01 ),
c(a,b,g) ~ dexp( 1 ),
etasq ~ dexp( 2 ),
rhosq ~ dexp( 0.5 )
), data=dat_list , chains=4 , cores=4 , iter=2000 )
## R code 14.39
precis( m14.7 , depth=3 )
## R code 14.40
post <- extract.samples(m14.7)
# plot the posterior median covariance function
plot( NULL , xlab="distance (thousand km)" , ylab="covariance" ,
xlim=c(0,10) , ylim=c(0,2) )
# compute posterior mean covariance
x_seq <- seq( from=0 , to=10 , length.out=100 )
pmcov <- sapply( x_seq , function(x) post$etasq*exp(-post$rhosq*x^2) )
pmcov_mu <- apply( pmcov , 2 , mean )
lines( x_seq , pmcov_mu , lwd=2 )
# plot 60 functions sampled from posterior
for ( i in 1:50 )
curve( post$etasq[i]*exp(-post$rhosq[i]*x^2) , add=TRUE ,
col=col.alpha("black",0.3) )
## R code 14.41
# compute posterior median covariance among societies
K <- matrix(0,nrow=10,ncol=10)
for ( i in 1:10 )
for ( j in 1:10 )
K[i,j] <- median(post$etasq) *
exp( -median(post$rhosq) * islandsDistMatrix[i,j]^2 )
diag(K) <- median(post$etasq) + 0.01
## R code 14.42
# convert to correlation matrix
Rho <- round( cov2cor(K) , 2 )
# add row/col names for convenience
colnames(Rho) <- c("Ml","Ti","SC","Ya","Fi","Tr","Ch","Mn","To","Ha")
rownames(Rho) <- colnames(Rho)
Rho
## R code 14.43
# scale point size to logpop
psize <- d$logpop / max(d$logpop)
psize <- exp(psize*1.5)-2
# plot raw data and labels
plot( d$lon2 , d$lat , xlab="longitude" , ylab="latitude" ,
col=rangi2 , cex=psize , pch=16 , xlim=c(-50,30) )
labels <- as.character(d$culture)
text( d$lon2 , d$lat , labels=labels , cex=0.7 , pos=c(2,4,3,3,4,1,3,2,4,2) )
# overlay lines shaded by Rho
for( i in 1:10 )
for ( j in 1:10 )
if ( i < j )
lines( c( d$lon2[i],d$lon2[j] ) , c( d$lat[i],d$lat[j] ) ,
lwd=2 , col=col.alpha("black",Rho[i,j]^2) )
## R code 14.44
# compute posterior median relationship, ignoring distance
logpop.seq <- seq( from=6 , to=14 , length.out=30 )
lambda <- sapply( logpop.seq , function(lp) exp( post$a + post$bp*lp ) )
lambda.median <- apply( lambda , 2 , median )
lambda.PI80 <- apply( lambda , 2 , PI , prob=0.8 )
# plot raw data and labels
plot( d$logpop , d$total_tools , col=rangi2 , cex=psize , pch=16 ,
xlab="log population" , ylab="total tools" )
text( d$logpop , d$total_tools , labels=labels , cex=0.7 ,
pos=c(4,3,4,2,2,1,4,4,4,2) )
# display posterior predictions
lines( logpop.seq , lambda.median , lty=2 )
lines( logpop.seq , lambda.PI80[1,] , lty=2 )
lines( logpop.seq , lambda.PI80[2,] , lty=2 )
# overlay correlations
for( i in 1:10 )
for ( j in 1:10 )
if ( i < j )
lines( c( d$logpop[i],d$logpop[j] ) ,
c( d$total_tools[i],d$total_tools[j] ) ,
lwd=2 , col=col.alpha("black",Rho[i,j]^2) )
## R code 14.45
m14.7nc <- ulam(
alist(
T ~ dpois(lambda),
lambda <- (a*P^b/g)*exp(k[society]),
# non-centered Gaussian Process prior
transpars> vector[10]: k <<- L_SIGMA * z,
vector[10]: z ~ normal( 0 , 1 ),
transpars> matrix[10,10]: L_SIGMA <<- cholesky_decompose( SIGMA ),
transpars> matrix[10,10]: SIGMA <- cov_GPL2( Dmat , etasq , rhosq , 0.01 ),
c(a,b,g) ~ dexp( 1 ),
etasq ~ dexp( 2 ),
rhosq ~ dexp( 0.5 )
), data=dat_list , chains=4 , cores=4 , iter=2000 )
## R code 14.46
library(rethinking)
data(Primates301)
data(Primates301_nex)
# plot it using ape package - install.packages('ape') if needed
library(ape)
plot( ladderize(Primates301_nex) , type="fan" , font=1 , no.margin=TRUE ,
label.offset=1 , cex=0.5 )
## R code 14.47
d <- Primates301
d$name <- as.character(d$name)
dstan <- d[ complete.cases( d$group_size , d$body , d$brain ) , ]
spp_obs <- dstan$name
## R code 14.48
dat_list <- list(
N_spp = nrow(dstan),
M = standardize(log(dstan$body)),
B = standardize(log(dstan$brain)),
G = standardize(log(dstan$group_size)),
Imat = diag( nrow(dstan) )
)
m14.8 <- ulam(
alist(
B ~ multi_normal( mu , SIGMA ),
mu <- a + bM*M + bG*G,
matrix[N_spp,N_spp]: SIGMA <- Imat * sigma_sq,
a ~ normal( 0 , 1 ),
c(bM,bG) ~ normal( 0 , 0.5 ),
sigma_sq ~ exponential( 1 )
), data=dat_list , chains=4 , cores=4 )
precis( m14.8 )
## R code 14.49
library(ape)
tree_trimmed <- keep.tip( Primates301_nex, spp_obs )
Rbm <- corBrownian( phy=tree_trimmed )
V <- vcv(Rbm)
Dmat <- cophenetic( tree_trimmed )
plot( Dmat , V , xlab="phylogenetic distance" , ylab="covariance" )
## R code 14.50
# put species in right order
dat_list$V <- V[ spp_obs , spp_obs ]
# convert to correlation matrix
dat_list$R <- dat_list$V / max(V)
# Brownian motion model
m14.9 <- ulam(
alist(
B ~ multi_normal( mu , SIGMA ),
mu <- a + bM*M + bG*G,
matrix[N_spp,N_spp]: SIGMA <- R * sigma_sq,
a ~ normal( 0 , 1 ),
c(bM,bG) ~ normal( 0 , 0.5 ),
sigma_sq ~ exponential( 1 )
), data=dat_list , chains=4 , cores=4 )
precis( m14.9 )
## R code 14.51
# add scaled and reordered distance matrix
dat_list$Dmat <- Dmat[ spp_obs , spp_obs ] / max(Dmat)
m14.10 <- ulam(
alist(
B ~ multi_normal( mu , SIGMA ),
mu <- a + bM*M + bG*G,
matrix[N_spp,N_spp]: SIGMA <- cov_GPL1( Dmat , etasq , rhosq , 0.01 ),
a ~ normal(0,1),
c(bM,bG) ~ normal(0,0.5),
etasq ~ half_normal(1,0.25),
rhosq ~ half_normal(3,0.25)
), data=dat_list , chains=4 , cores=4 )
precis( m14.10 )
## R code 14.52
post <- extract.samples(m14.10)
plot( NULL , xlim=c(0,max(dat_list$Dmat)) , ylim=c(0,1.5) ,
xlab="phylogenetic distance" , ylab="covariance" )
# posterior
for ( i in 1:30 )
curve( post$etasq[i]*exp(-post$rhosq[i]*x) , add=TRUE , col=rangi2 )
# prior mean and 89% interval
eta <- abs(rnorm(1e3,1,0.25))
rho <- abs(rnorm(1e3,3,0.25))
d_seq <- seq(from=0,to=1,length.out=50)
K <- sapply( d_seq , function(x) eta*exp(-rho*x) )
lines( d_seq , colMeans(K) , lwd=2 )
shade( apply(K,2,PI) , d_seq )
text( 0.5 , 0.5 , "prior" )
text( 0.2 , 0.1 , "posterior" , col=rangi2 )
## R code 14.53
S <- matrix( c( sa^2 , sa*sb*rho , sa*sb*rho , sb^2 ) , nrow=2 )