-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02-Data-Pre-processing.Rmd
220 lines (181 loc) · 13.1 KB
/
02-Data-Pre-processing.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# Data Preparation
Next, we prepare the data for visualisation and analysis. This includes steps such as changing the type of the variables.
In the raw data, each of the 4 types of emotions (Low Arousal Negative Affect, Low Arousal Positive Affect, High Arousal Negative Affect and High Arousal Positive Affect) are represented by multiple emotion questions in the ESM questionnaire.
For each type of emotion, we first save each emotion question into a new factor variable. Followingly, we revalue each variable, so that `yes` corresponds to `1`, `no` corresponds to `0` and missing values correspond to `NA`. Finally, we convert the revalued variable back to a numeric and save that as new variable.
## Low Arousal Negative Affect
```{r Low Arousal Negative Affect}
#Recode Low Arousal Negative Affect to factor
df$FactorQuestion.23.1 <- as.factor(as.character(df$Question.23.1))
df$FactorQuestion.23.2 <- as.factor(as.character(df$Question.23.2))
df$FactorQuestion.23.3 <- as.factor(as.character(df$Question.23.3))
df$FactorQuestion.23.4 <- as.factor(as.character(df$Question.23.4))
df$FactorQuestion.23.5 <- as.factor(as.character(df$Question.23.5))
df$FactorQuestion.23.6 <- as.factor(as.character(df$Question.23.6))
df$FactorQuestion.23.7 <- as.factor(as.character(df$Question.23.7))
#recode level 0 to NA, no to 0 and yes to 1
df$FactorQuestion.23.1 <- plyr::revalue(df$FactorQuestion.23.1, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.23.2 <- plyr::revalue(df$FactorQuestion.23.2, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.23.3 <- plyr::revalue(df$FactorQuestion.23.3, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.23.4 <- plyr::revalue(df$FactorQuestion.23.4, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.23.5 <- plyr::revalue(df$FactorQuestion.23.5, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.23.6 <- plyr::revalue(df$FactorQuestion.23.6, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.23.7 <- plyr::revalue(df$FactorQuestion.23.7, c('0' = NA, '1' = '0', '2' = '1'))
#recode back to a numeric so we can make a mean
df$NumericQuestion.23.1 <- as.numeric(as.character(df$FactorQuestion.23.1))
df$NumericQuestion.23.2 <- as.numeric(as.character(df$FactorQuestion.23.2))
df$NumericQuestion.23.3 <- as.numeric(as.character(df$FactorQuestion.23.3))
df$NumericQuestion.23.4 <- as.numeric(as.character(df$FactorQuestion.23.4))
df$NumericQuestion.23.5 <- as.numeric(as.character(df$FactorQuestion.23.5))
df$NumericQuestion.23.6 <- as.numeric(as.character(df$FactorQuestion.23.6))
df$NumericQuestion.23.7 <- as.numeric(as.character(df$FactorQuestion.23.7))
```
## Low Arousal Positive Affect
```{r Low Arousal Positive Affect}
df$FactorQuestion.24.1 <- as.factor(as.character(df$Question.24.1))
df$FactorQuestion.24.2 <- as.factor(as.character(df$Question.24.2))
df$FactorQuestion.24.3 <- as.factor(as.character(df$Question.24.3))
df$FactorQuestion.24.4 <- as.factor(as.character(df$Question.24.4))
df$FactorQuestion.24.5 <- as.factor(as.character(df$Question.24.5))
df$FactorQuestion.24.6 <- as.factor(as.character(df$Question.24.6))
df$FactorQuestion.24.7 <- as.factor(as.character(df$Question.24.7))
#recode level 0 to NA, no to 0 and yes to 1
df$FactorQuestion.24.1 <- plyr::revalue(df$FactorQuestion.24.1, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.24.2 <- plyr::revalue(df$FactorQuestion.24.2, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.24.3 <- plyr::revalue(df$FactorQuestion.24.3, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.24.4 <- plyr::revalue(df$FactorQuestion.24.4, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.24.5 <- plyr::revalue(df$FactorQuestion.24.5, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.24.6 <- plyr::revalue(df$FactorQuestion.24.6, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.24.7 <- plyr::revalue(df$FactorQuestion.24.7, c('0' = NA, '1' = '0', '2' = '1'))
#recode back to a numeric so we can make a mean
df$NumericQuestion.24.1 <- as.numeric(as.character(df$FactorQuestion.24.1))
df$NumericQuestion.24.2 <- as.numeric(as.character(df$FactorQuestion.24.2))
df$NumericQuestion.24.3 <- as.numeric(as.character(df$FactorQuestion.24.3))
df$NumericQuestion.24.4 <- as.numeric(as.character(df$FactorQuestion.24.4))
df$NumericQuestion.24.5 <- as.numeric(as.character(df$FactorQuestion.24.5))
df$NumericQuestion.24.6 <- as.numeric(as.character(df$FactorQuestion.24.6))
df$NumericQuestion.24.7 <- as.numeric(as.character(df$FactorQuestion.24.7))
```
## High Arousal Negative Affect
```{r High Arousal Negative Affect}
df$FactorQuestion.25.1 <- as.factor(as.character(df$Question.25.1))
df$FactorQuestion.25.2 <- as.factor(as.character(df$Question.25.2))
df$FactorQuestion.25.3 <- as.factor(as.character(df$Question.25.3))
df$FactorQuestion.25.4 <- as.factor(as.character(df$Question.25.4))
df$FactorQuestion.25.5 <- as.factor(as.character(df$Question.25.5))
df$FactorQuestion.25.6 <- as.factor(as.character(df$Question.25.6))
df$FactorQuestion.25.7 <- as.factor(as.character(df$Question.25.7))
df$FactorQuestion.25.8 <- as.factor(as.character(df$Question.25.8))
df$FactorQuestion.25.9 <- as.factor(as.character(df$Question.25.9))
#recode level 0 to NA, no to 0 and yes to 1
df$FactorQuestion.25.1 <- plyr::revalue(df$FactorQuestion.25.1, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.25.2 <- plyr::revalue(df$FactorQuestion.25.2, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.25.3 <- plyr::revalue(df$FactorQuestion.25.3, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.25.4 <- plyr::revalue(df$FactorQuestion.25.4, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.25.5 <- plyr::revalue(df$FactorQuestion.25.5, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.25.6 <- plyr::revalue(df$FactorQuestion.25.6, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.25.7 <- plyr::revalue(df$FactorQuestion.25.7, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.25.8 <- plyr::revalue(df$FactorQuestion.25.8, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.25.9 <- plyr::revalue(df$FactorQuestion.25.9, c('0' = NA, '1' = '0', '2' = '1'))
#recode back to a numeric so we can make a mean
df$NumericQuestion.25.1 <- as.numeric(as.character(df$FactorQuestion.25.1))
df$NumericQuestion.25.2 <- as.numeric(as.character(df$FactorQuestion.25.2))
df$NumericQuestion.25.3 <- as.numeric(as.character(df$FactorQuestion.25.3))
df$NumericQuestion.25.4 <- as.numeric(as.character(df$FactorQuestion.25.4))
df$NumericQuestion.25.5 <- as.numeric(as.character(df$FactorQuestion.25.5))
df$NumericQuestion.25.6 <- as.numeric(as.character(df$FactorQuestion.25.6))
df$NumericQuestion.25.7 <- as.numeric(as.character(df$FactorQuestion.25.7))
df$NumericQuestion.25.8 <- as.numeric(as.character(df$FactorQuestion.25.8))
df$NumericQuestion.25.9 <- as.numeric(as.character(df$FactorQuestion.25.9))
```
## High Arousal Positive Affect
```{r High Arousal Positive Affect}
#Recode High Arousal Positive Affect
df$FactorQuestion.26.1 <- as.factor(as.character(df$Question.26.1))
df$FactorQuestion.26.2 <- as.factor(as.character(df$Question.26.2))
df$FactorQuestion.26.3 <- as.factor(as.character(df$Question.26.3))
df$FactorQuestion.26.4 <- as.factor(as.character(df$Question.26.4))
df$FactorQuestion.26.5 <- as.factor(as.character(df$Question.26.5))
df$FactorQuestion.26.6 <- as.factor(as.character(df$Question.26.6))
df$FactorQuestion.26.7 <- as.factor(as.character(df$Question.26.7))
df$FactorQuestion.26.8 <- as.factor(as.character(df$Question.26.8))
df$FactorQuestion.26.9 <- as.factor(as.character(df$Question.26.9))
#recode level 0 to NA, no to 0 and yes to 1
df$FactorQuestion.26.1 <- plyr::revalue(df$FactorQuestion.26.1, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.26.2 <- plyr::revalue(df$FactorQuestion.26.2, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.26.3 <- plyr::revalue(df$FactorQuestion.26.3, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.26.4 <- plyr::revalue(df$FactorQuestion.26.4, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.26.5 <- plyr::revalue(df$FactorQuestion.26.5, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.26.6 <- plyr::revalue(df$FactorQuestion.26.6, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.26.7 <- plyr::revalue(df$FactorQuestion.26.7, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.26.8 <- plyr::revalue(df$FactorQuestion.26.8, c('0' = NA, '1' = '0', '2' = '1'))
df$FactorQuestion.26.9 <- plyr::revalue(df$FactorQuestion.26.9, c('0' = NA, '1' = '0', '2' = '1'))
#recode back to a numeric so we can make a mean
df$NumericQuestion.26.1 <- as.numeric(as.character(df$FactorQuestion.26.1))
df$NumericQuestion.26.2 <- as.numeric(as.character(df$FactorQuestion.26.2))
df$NumericQuestion.26.3 <- as.numeric(as.character(df$FactorQuestion.26.3))
df$NumericQuestion.26.4 <- as.numeric(as.character(df$FactorQuestion.26.4))
df$NumericQuestion.26.5 <- as.numeric(as.character(df$FactorQuestion.26.5))
df$NumericQuestion.26.6 <- as.numeric(as.character(df$FactorQuestion.26.6))
df$NumericQuestion.26.7 <- as.numeric(as.character(df$FactorQuestion.26.7))
df$NumericQuestion.26.8 <- as.numeric(as.character(df$FactorQuestion.26.8))
df$NumericQuestion.26.9 <- as.numeric(as.character(df$FactorQuestion.26.9))
```
## Create Means Per Observation
After having pre-processed the emotion variables, we need to calculate a mean for each type of emotion for each unique observation. Because there is one row per observation, we can simply calculate the row means.
```{r RowMeans, echo=TRUE}
#create rowmeans (i.e. by participant and by timepoint)
#get column number for each type of emotion.
LowArousalNegativeAffectStart <- match("NumericQuestion.23.1", names(df))
LowArousalNegativeAffectEnd <- match("NumericQuestion.23.7", names(df))
LowArousalPositiveAffectStart <- match("NumericQuestion.24.1", names(df))
LowArousalPositiveAffectEnd <- match("NumericQuestion.24.7", names(df))
HighArousalNegativeAffectStart <- match("NumericQuestion.25.1", names(df))
HighArousalNegativeAffectEnd <- match("NumericQuestion.25.9", names(df))
HighArousalPositiveAffectStart <- match("NumericQuestion.26.1", names(df))
HighArousalPositiveAffectEnd <- match("NumericQuestion.26.9", names(df))
#assign new row mean variable per type of emotion
df$LowArousalNegativeAffectMean <- rowMeans(df[,LowArousalNegativeAffectStart:LowArousalNegativeAffectEnd], na.rm = TRUE)
df$LowArousalPositiveAffectMean <- rowMeans(df[,LowArousalPositiveAffectStart:LowArousalPositiveAffectEnd], na.rm = TRUE)
df$HighArousalNegativeAffectMean <- rowMeans(df[,HighArousalNegativeAffectStart:HighArousalNegativeAffectEnd], na.rm = TRUE)
df$HighArousalPositiveAffectMean <- rowMeans(df[,HighArousalPositiveAffectStart:HighArousalPositiveAffectEnd], na.rm = TRUE)
```
## Recode Remaining Variables
We also need to prepare our remaining variables. The first variable we need to create is a variable that codes whether participants were alone at the time of the questionnaire.
```{r Alone, echo=TRUE}
#From the pre-registration:
#if question 3 = 2 -> code 1.
#if q3 = 1 and q4 = 1 -> code 1, else code 0
df <- df %>%
mutate(Alone = ifelse(Question.3 == 1 & Question.4 == 1 | Question.3 == 2, 1, 0))
df$Alone <- as.factor(as.character(df$Alone))
```
Afterwards, we will calculate the day-level moderators. For that, we first need to create a variable that demarks the day. Moreover, because there are multiple observations per day for each subject, we need to create a combined `SubjDay` variable (following <https://psyarxiv.com/xf2pw/>).
```{r SubjDay, echo=TRUE}
#Calculating a variable that indicates the day.
#Note: They use the American way to denote days.
df <- df %>% tidyr::separate(ï..Start.Date, into = c("Day", "Time"), sep = " ", remove = FALSE)
#Create SubjDay Variable following https://psyarxiv.com/xf2pw/
df$SubjDay <- paste(df$Participant.ID, df$Day, sep = " ")
```
Followingly, we calculate day-level exposure to nature, day-level intentionality for solitude, and day-level structure for solitude. Higher scores here correspond to higher exposure / intentionality / structure.
```{r DayLevelVariables, echo=TRUE}
#Calculate Daylevel Nature
#Question 2, higher score = more nature
#Calculates Mean by Day and Participant.
df <- ddply(df, c('Day', 'Participant.ID'), transform, DayLevelNature = mean(Question.2))
#Calculate DayLevel Intentionality for Solitude
df$FactorQuestion.5 <- as.factor(as.character(df$Question.5))
df$FactorQuestion.5 <- plyr::revalue(df$FactorQuestion.5, c('3' = NA, '1' = '0', '2' = '1'))
df$NumericQuestion.5 <- as.numeric(as.character(df$FactorQuestion.5))
#The warning seems non-sensical after manual re-checking.
#higher score = higher intentionality.
df <- df %>% mutate(NumericQuestion.5.Alone = ifelse(Alone == 1, NumericQuestion.5, NA))
df <- df %>% ddply(c('Day', 'Participant.ID'), transform, DayLevelIntentionalitySolitude = mean(NumericQuestion.5.Alone, na.rm = TRUE))
#daylevel structure for solitude
df$FactorQuestion.6 <- as.factor(as.character(df$Question.6))
df$FactorQuestion.6 <- plyr::revalue(df$FactorQuestion.6, c('3' = NA, '1' = '0', '2' = '1'))
df$NumericQuestion.6 <- as.numeric(as.character(df$FactorQuestion.6))
#That means, higher score = higher goal / structure.
df <- df %>% mutate(NumericQuestion.6.Alone = ifelse(Alone == 1, NumericQuestion.6, NA))
df <- df %>% ddply(c('Day', 'Participant.ID'), transform, DayLevelStructureSolitude= mean(NumericQuestion.6.Alone, na.rm = TRUE))
```