-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpath_viz_12_sankey.R
149 lines (123 loc) · 5.61 KB
/
path_viz_12_sankey.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
source('source.R')
adult_panel <- read.csv('data/Output/adult_panel.csv')
#### Sankey Plot: All 5 levels (Complete Cases) ####
step_1 <- adult_panel %>%
filter(as.numeric(R02_CONTINUING_ADULT_LD)==1,
as.numeric(R03_ADULTTYPE)==1) %>%
drop_na(smoking_status_full_w1,
smoking_status_full_w2,
smoking_status_full_w3) %>%
dplyr::select(smoking_status_full_w1, smoking_status_full_w2) %>%
count(smoking_status_full_w1, smoking_status_full_w2) %>%
rename(before = smoking_status_full_w1,
after = smoking_status_full_w2,
flow = n) %>%
mutate(step_from = 'wave 1',
step_to = 'wave 2',
before = paste('w1', before, sep = '_'),
after = paste('w2', after, sep = '_'))
step_2 <- adult_panel %>%
filter(as.numeric(R02_CONTINUING_ADULT_LD)==1,
as.numeric(R03_ADULTTYPE)==1) %>%
drop_na(smoking_status_full_w1,
smoking_status_full_w2,
smoking_status_full_w3) %>%
dplyr::select(smoking_status_full_w2, smoking_status_full_w3) %>%
count(smoking_status_full_w2, smoking_status_full_w3) %>%
rename(before = smoking_status_full_w2,
after = smoking_status_full_w3,
flow = n) %>%
mutate(step_from = 'wave 2',
step_to = 'wave 3',
before = paste('w2', before, sep = '_'),
after = paste('w3', after, sep = '_'))
sankey_df <- rbind(step_1, step_2)
write.csv(sankey_df, 'Output/sankey_df.csv', row.names = FALSE)
#### Sankey Plot: All 5 levels (Include Subject and Item Non-Response) ####
step_1 <- adult_panel %>%
dplyr::select(smoking_status_full_w1, smoking_status_full_w2) %>%
count(smoking_status_full_w1, smoking_status_full_w2) %>%
rename(before = smoking_status_full_w1,
after = smoking_status_full_w2,
flow = n) %>%
mutate(step_from = 'wave 1',
step_to = 'wave 2',
before = paste('w1', before, sep = '_'),
after = paste('w2', after, sep = '_'))
step_2 <- adult_panel %>%
dplyr::select(smoking_status_full_w2, smoking_status_full_w3) %>%
count(smoking_status_full_w2, smoking_status_full_w3) %>%
rename(before = smoking_status_full_w2,
after = smoking_status_full_w3,
flow = n) %>%
mutate(step_from = 'wave 2',
step_to = 'wave 3',
before = paste('w2', before, sep = '_'),
after = paste('w3', after, sep = '_'))
sankey_df <- rbind(step_1, step_2)
sankey_df <- sankey_df %>%
mutate(before = str_replace_all(before, 'NA', 'missing'),
after = str_replace_all(after, 'NA', 'missing'))
write.csv(sankey_df, 'Output/sankey_non_response_df.csv', row.names = FALSE)
#### W1 Cur. Est. Smokers (Complete Cases) ####
step_1 <- adult_panel %>%
filter(as.numeric(R02_CONTINUING_ADULT_LD)==1,
as.numeric(R03_ADULTTYPE)==1,
current_est_smoker_w1 ==1 ) %>%
dplyr::select(smoking_status_full_w1, smoking_status_full_w2) %>%
drop_na(smoking_status_full_w2) %>%
count(smoking_status_full_w1, smoking_status_full_w2) %>%
rename(before = smoking_status_full_w1,
after = smoking_status_full_w2,
flow = n) %>%
mutate(step_from = 'wave 1',
step_to = 'wave 2',
before = paste('w1', before, sep = '_'),
after = paste('w2', after, sep = '_'))
step_2 <- adult_panel %>%
filter(as.numeric(R02_CONTINUING_ADULT_LD)==1,
as.numeric(R03_ADULTTYPE)==1,
current_est_smoker_w1 ==1 ) %>%
dplyr::select(smoking_status_full_w2, smoking_status_full_w3) %>%
drop_na(smoking_status_full_w2,
smoking_status_full_w3) %>%
count(smoking_status_full_w2, smoking_status_full_w3) %>%
rename(before = smoking_status_full_w2,
after = smoking_status_full_w3,
flow = n) %>%
mutate(step_from = 'wave 2',
step_to = 'wave 3',
before = paste('w2', before, sep = '_'),
after = paste('w3', after, sep = '_'))
cur_est_sankey_df <- rbind(step_1, step_2)
View(cur_est_sankey_df)
write.csv(cur_est_sankey_df, 'Output/cur_est_sankey_df.csv', row.names = F)
#### W1 Cur. Est. Smokers (Include Missing Data) ####
step_1 <- adult_panel %>%
dplyr::filter(current_est_smoker_w1 == 1) %>%
dplyr::select(smoking_status_full_w1, smoking_status_full_w2) %>%
count(smoking_status_full_w1, smoking_status_full_w2) %>%
rename(before = smoking_status_full_w1,
after = smoking_status_full_w2,
flow = n) %>%
mutate(step_from = 'wave 1',
step_to = 'wave 2',
before = paste('w1', before, sep = '_'),
after = paste('w2', after, sep = '_'))
step_2 <- adult_panel %>%
dplyr::filter(current_est_smoker_w1 == 1) %>%
dplyr::select(smoking_status_full_w2, smoking_status_full_w3) %>%
count(smoking_status_full_w2, smoking_status_full_w3) %>%
rename(before = smoking_status_full_w2,
after = smoking_status_full_w3,
flow = n) %>%
mutate(step_from = 'wave 2',
step_to = 'wave 3',
before = paste('w2', before, sep = '_'),
after = paste('w3', after, sep = '_'))
sankey_df <- rbind(step_1, step_2)
sankey_df <- sankey_df %>%
mutate(before = str_replace_all(before, 'NA', 'missing'),
after = str_replace_all(after, 'NA', 'missing'))
sankey_df %>% View
write.csv(sankey_df, 'Output/sankey_cur_est_smokers_non_response_df.csv', row.names = FALSE)