-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata-wrangling.qmd
112 lines (96 loc) · 3.86 KB
/
data-wrangling.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
---
title: "Data Wrangling"
format: html
---
Run the code chunks in this file to wrangle the data for the app.
## Chunk 1
```{r}
# Download the selected indicators for the post-1990 period.
library(vdemdata)
library(dplyr)
library(readr)
library(countrycode)
vdem_df <- vdem |>
filter(year >= 1990) |>
select(
country_name,
country_id,
year,
v2x_polyarchy,
v2x_libdem,
v2x_partipdem,
v2x_delibdem,
v2x_egaldem,
v2x_civlib,
v2x_rule,
v2xnp_client,
v2xnp_regcorr,
v2x_gender,
e_gdppc,
region = e_regionpol_6C) |>
mutate(gdp_pc = scales::dollar_format()(e_gdppc*1000),
e_gdppc = e_gdppc*1000) |>
mutate(
region = case_match(region,
1 ~ "Eastern Europe",
2 ~ "Latin America",
3 ~ "Middle East",
4 ~ "Africa",
5 ~ "The West",
6 ~ "Asia")
) |>
# add iso3c code
mutate(iso3c = countrycode(sourcevar = country_id,
origin = "vdem",
destination = "wb")) |>
relocate(iso3c, .after = country_id)
glimpse(vdem_df)
#write_rds(vdem_tbl, "vdem_trends.rds")
```
## Chunk 2
```{r}
# Now merge these data with country shapes from `rnaturalearth`.
library(rnaturalearth)
library(sf)
library(dplyr)
library(tidyr)
# Load world boundaries
world <- ne_countries(scale = "medium", returnclass = "sf") |>
select(iso_a3, geometry) |>
filter(iso_a3 != "ATA") |>
rename(iso3c = iso_a3)
# Expand vdem_df to include all country-year combinations
years <- seq(min(vdem_df$year), max(vdem_df$year))
countries <- unique(world$iso3c)
expanded_vdem_df <- expand.grid(iso3c = countries, year = years) |>
left_join(vdem_df, by = c("iso3c", "year"))
# Join with world boundaries and abbreviate country names
vdem_data <- left_join(world, expanded_vdem_df, by = "iso3c") |>
mutate(country_name = if_else(iso3c == "GRL", "Greenland", country_name)) |>
mutate(country_name = if_else(iso3c == "SSD", "South Sudan", country_name)) |>
mutate(country_name = if_else(country_name == "North Macedonia", "N. Macedonia", country_name)) |>
mutate(country_name = if_else(country_name == "Bosnia and Herzegovina", "BiH", country_name)) |>
mutate(country_name = if_else(country_name == "Dominican Republic", "Dominican Rep.", country_name)) |>
mutate(country_name = if_else(country_name == "Trinidad and Tobago", "T&T", country_name)) |>
mutate(country_name = if_else(country_name == "United Arab Emirates", "UAE", country_name)) |>
mutate(country_name = if_else(country_name == "Palestine/Gaza", "Palestine", country_name)) |>
mutate(country_name = if_else(country_name == "Equitorial Guinea", "Eq. Guinea", country_name)) |>
mutate(country_name = if_else(country_name == "Republic of the Congo", "Congo-Brazzaville", country_name)) |>
mutate(country_name = if_else(country_name == "Democratic Republic of the Congo", "DRC", country_name)) |>
mutate(country_name = if_else(country_name == "Central African Republic", "CAR", country_name)) |>
mutate(country_name = if_else(country_name == "Sao Tome and Principe", "STP", country_name)) |>
mutate(country_name = if_else(country_name == "United States of America", "United States", country_name)) |>
mutate(country_name = if_else(country_name == "Burma/Myanmar", "Myanmar", country_name)) |>
mutate(country_name = if_else(country_name == "Papua New Guinea", "PNG", country_name))
# Replace missing e_gdppc and gdp_pc values with previous year's values
# Note: could try solving this issue by mergin WB data instead
vdem_data <- vdem_data |>
group_by(iso3c) |>
mutate(
e_gdppc = if (any(!is.na(e_gdppc))) zoo::na.locf(e_gdppc, na.rm = FALSE) else e_gdppc,
gdp_pc = if (any(!is.na(gdp_pc))) zoo::na.locf(gdp_pc, na.rm = FALSE) else gdp_pc
) |>
ungroup()
glimpse(vdem_data)
write_rds(vdem_data, "vdem_data.rds")
```