-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPractice_4_5.R
127 lines (87 loc) · 2.39 KB
/
Practice_4_5.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Tuesday, 02/08/2022
# Practice_4
#---------------------------------------------------------------------------#
# Thursday, 02/10/2022
# Practice_5
df1 <- read.csv("C:/Users/bijangurung/Documents/GEOG728_1/KS_BEA.csv")
head(df1)
summary(df1)
dim(df1)
colnames(df1)
library(tidyverse)
glimpse(df1)
head(df1)
View(df1)
summary(df1$Description)
str((df1$Description))
is.na(df1)
sum(is.na(df1))
df1 <- df1 %>%
slice(1:3604)
tail(df1)
df1_tidy <- df1 %>%
pivot_longer(cols = X2001:X2019,
names_to = "Year",
values_to = "Values",
names_prefix = "X")
head(df1_tidy)
glimpse(df1_tidy)
df1_tidier <- df1_tidy %>%
pivot_wider(id_cols = c("GeoFIPS", "Year"),
names_from = Description,
values_from = Values)
head(df1_tidier)
glimpse(df1_tidier)
View(df1_tidier)
colSums(is.na(df1_tidier))
str(df1_tidier)
dim(df1_tidier)
glimpse(df1_tidier)
summary(df1_tidier)
#df1_clean <- df1_tidier %>%
# transform(Government and government enterprises=as.numeric(Government and government enterprises))
df1_clean <- df1_tidier %>%
mutate_at(vars(Year:"Private services-providing industries 3/"),
~as.numeric(.))
glimpse(df1_clean)
df1_clean <- df1_clean %>%
rename(Mining = ` Agriculture, forestry, fishing and hunting`,
Ag = ` Mining, quarrying, and oil and gas extraction`,
PI = ` Private industries`,
total = `All industry total`) %>%
filter(GeoFIPS != "\"2000\"")
dim(df1_clean)
glimpse(df1_clean)
length(unique(df1_clean$GeoFIPS))
length(unique(df1_clean$Year))
View(df1_clean)
df1_clean <- df1_clean %>%
filter(GeoFIPS != "\"20000\"" )
head(df1_clean)
dim(df1_clean)
df1_clean %>%
group_by(Year) %>%
summarise_all(~sum(is.na(.)))
df1_clean %>%
group_by(GeoFIPS) %>%
summarise_all(~sum(is.na(.)))
df1_clean %>%
summarise_all(~mean(., na.rm=T))
df1_clean %>%
summarise_all(~sd(., na.rm=T))
#Tuesday, 02/15/2022-------------------------------------
ggplot(df1_clean) +
geom_histogram(aes(x=total))
ggplot(df1_clean) +
geom_histogram(aes(x=Ag))
df1_clean %>%
filter(Year == 2012) %>%
ggplot(.) +
geom_histogram(aes(x=Ag))
ggplot(df1_clean) +
geom_boxplot(aes(x=GeoFIPS, y= total), fill="lightblue", alpha=0.7)+
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, size = 5))
ggplot(df1_clean)+
geom_point(aes(x=Year, y=Ag))+
geom_smooth(aes(x=Year, y=Ag))