-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKeyword charts.R
114 lines (87 loc) · 4.87 KB
/
Keyword charts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
library(ggplot2)
library(ggthemes)
library(tidyverse)
library(readr)
library(forecast)
library(magrittr)
# Moving average
ma_month <- function(x, n = 29){stats::filter(x, rep(1 / n, n), sides = 2)}
ma_2week <- function(x, n = 13){stats::filter(x, rep(1 / n, n), sides = 2)}
# Import (manual muahaha!)
d1 <- read_csv("extinction_rebellion.js.csv")
d2 <- read_csv("climate_change.js.csv")
d3 <- read_csv("climate_crisis.js.csv")
d4 <- read_csv("citizens_assembly.js.csv")
d5 <- read_csv("global_warming.js.csv")
d6 <- read_csv("climate_emergency.js.csv")
# Full join, could be done much better
hansard <- full_join(d1, d2, by = "Date")
hansard <- full_join(hansard, d3, by = "Date")
hansard <- full_join(hansard, d4, by = "Date")
hansard <- full_join(hansard, d5, by = "Date")
hansard <- full_join(hansard, d6, by = "Date")
# ORDER, ORDER! (Order matters)
colnames(hansard) <- c("Date",
"extinction_rebellion", "climate_change", "climate_crisis",
"citizens_assembly", "global_warming", "climate_emergency")
hansard <- hansard[order(hansard$Date), ] # sort by Date
# Fill in missing days just to be save
hansard <- hansard %>%
mutate(Date = as.Date(Date)) %>%
complete(Date = seq.Date(min(Date), max(Date), by="day"))
hansard$Date <- as.POSIXct(hansard$Date)
hansard[is.na(hansard)] <- 0 # replace all NAs with zeros
# Inspect
sapply(hansard[, -1], table)
# Add keywords together
hansard$climate_alarm <- hansard$climate_crisis + hansard$climate_emergency
hansard$global_heating <- hansard$climate_change + hansard$global_warming
# Smooth the lines
hansard_smooth <- lapply(hansard[,-1], ma_month) %>% as.data.frame()
hansard_smooth$Date <- as.POSIXct(hansard$Date)
# Set start date
hansard_smooth_2017 <- filter(hansard_smooth, Date >= as.POSIXct("2017-01-01"))
hansard_2017 <- filter(hansard, Date >= as.POSIXct("2017-01-01"))
# Simple MA plot
ggplot(data = hansard_smooth_2017) +
geom_line(aes(Date, scale(extinction_rebellion)), color = "limegreen", size = 1.2, alpha = 0.8) +
geom_line(aes(Date, scale(climate_alarm)), color = "red4", alpha = 0.6) +
geom_line(aes(Date, scale(global_heating)), color = "red", alpha = 0.6) +
geom_vline(xintercept = as.POSIXct("2018-11-29"), color = "hotpink") +
geom_vline(xintercept = as.POSIXct("2019-04-15"), color = "hotpink", linetype = "longdash") +
ylab("Standardised count of keywords in Hansard") +
theme_light() +
theme(axis.text.y = element_blank()) +
annotate(label = '"Extinction Rebellion"', "text", x = as.POSIXct("2017-08-29"), y = 0.4, color = "limegreen") +
annotate(label = '"Climate crisis/emergency"', "text", x = as.POSIXct("2019-01-29"), y = -0.7, color = "red4") +
annotate(label = '"Climate change & global warming"', "text", x = as.POSIXct("2018-06-29"), y = 1.1, color = "red") +
annotate(label = "2018-11-29", "text", x = as.POSIXct("2018-08-29"), y = 5, color = "hotpink") +
annotate(label = "2019-04-15", "text", x = as.POSIXct("2019-07-20"), y = 5, color = "hotpink")
ggsave("hansard-extinction-rebellion.pdf", width = 6, height = 4, device = "pdf")
# With LOESS smoothing function
span = 0.2
ggplot(data = hansard_2017) +
geom_line(aes(Date, scale(extinction_rebellion)), color = "limegreen", size = 1.2, alpha = 0.7, stat = "smooth", span = span, method = "loess") +
geom_line(aes(Date, scale(climate_alarm)), color = "red4", size = 1.2, alpha = 0.7, stat = "smooth", span = span, method = "loess") +
geom_line(aes(Date, scale(global_heating)), color = "red", size = 1.2, alpha = 0.7, stat = "smooth", span = span, method = "loess") +
geom_vline(xintercept = as.POSIXct("2018-11-29"), color = "hotpink") +
geom_vline(xintercept = as.POSIXct("2019-04-15"), color = "hotpink", linetype = "longdash") +
ylab("Standardised count of keywords in Hansard") +
theme_light() +
theme(axis.text.y = element_blank()) +
annotate(label = '"Extinction Rebellion"', "text", x = as.POSIXct("2017-08-29"), y = 0.1, color = "limegreen") +
annotate(label = '"Climate crisis/emergency"', "text", x = as.POSIXct("2018-02-28"), y = -0.4, color = "red4") +
annotate(label = '"Climate change & global warming"', "text", x = as.POSIXct("2018-06-29"), y = 0.3, color = "red") +
annotate(label = "2018-11-29", "text", x = as.POSIXct("2018-08-29"), y = 1.6, color = "hotpink") +
annotate(label = "2019-04-15", "text", x = as.POSIXct("2019-07-15"), y = 1.6, color = "hotpink")
ggsave("hansard-extinction-rebellion-fancy.pdf", width = 6, height = 4, device = "pdf")
#------ Other Stuff -----
# Simple plot
ggplot(data = hansard) +
geom_line(aes(date, extinction_rebellion), color = "red") +
geom_line(aes(date, climate_change), color = "darkgreen") +
geom_vline(xintercept = as.POSIXct("2018-11-29"), size = 1.5, color = "limegreen") +
theme_economist()
cor(hansard[, -1], use = "pairwise") %>% heatmap
cor(hansard_smooth[, -7], use = "pairwise") %>% heatmap
cor(hansard_smooth[, -7], use = "pairwise")