-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_exploration.sql
210 lines (153 loc) · 7.48 KB
/
data_exploration.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
/*
--------------------------------------------------------------------------------------------------------
Data Exploration for Unicorn Companies Analytics Project
--------------------------------------------------------------------------------------------------------
Reesearch Questions
=======================================================================================================
- Which unicorn companies have had the biggest return on investment?
- How long does it usually take for a company to become a unicorn?
- Which industries have the most unicorns?
- Which countries have the most unicorns?
- Which investors have funded the most unicorns?
=======================================================================================================
*/
USE UnicornCompanies
SELECT *
FROM UnicornCompanies.dbo.unicorn_info
ORDER BY 1 ASC
SELECT *
FROM UnicornCompanies.dbo.unicorn_finance
ORDER BY 1 ASC
-- Total Unicorn Companies
WITH UnicornCom (ID, Company, Industry, City, Country, Continent, Valuation, Funding, YearFounded, Year, SelectInvestors) AS
(SELECT inf.ID, inf.Company, inf.Industry, inf.City, inf.Country, inf.Continent, fin.Valuation, fin.Funding, inf.YearFounded,
fin.Year, fin.SelectInvestors
FROM UnicornCompanies.dbo.unicorn_info AS inf
INNER JOIN UnicornCompanies.dbo.unicorn_finance AS fin
ON inf.ID = fin.ID)
SELECT COUNT(1) AS Unicorn
FROM UnicornCom
WHERE (Year - YearFounded) >= 0
-- Total Countries
WITH UnicornCom (ID, Company, Industry, City, Country, Continent, Valuation, Funding, YearFounded, Year, SelectInvestors) AS
(SELECT inf.ID, inf.Company, inf.Industry, inf.City, inf.Country, inf.Continent, fin.Valuation, fin.Funding, inf.YearFounded,
fin.Year, fin.SelectInvestors
FROM UnicornCompanies.dbo.unicorn_info AS inf
INNER JOIN UnicornCompanies.dbo.unicorn_finance AS fin
ON inf.ID = fin.ID)
SELECT COUNT(DISTINCT Country) AS Country
FROM UnicornCom
WHERE (Year - YearFounded) >= 0
--------------------------------------------------------------------------------------------------------
/*
- Which unicorn companies have had the biggest return on investment?
*/
SELECT TOP 10 Company, (CONVERT(BIGINT, Valuation)-CONVERT(BIGINT, Funding))/CONVERT(BIGINT, Funding) AS Roi
FROM UnicornCompanies.dbo.unicorn_finance
ORDER BY Roi DESC
-- > 1.Zapier 2.Dunamu 3.Workhuman 4.CFGI 5.Manner 6.DJI Innovations 7.GalaxySpace 8.Canva 9.II Makiage 10.Revolution Precrafted
--------------------------------------------------------------------------------------------------------
/*
- How long does it usually take for a company to become a unicorn?
*/
-- Find average years to become a unicorn
WITH UnicornCom (ID, Company, Industry, City, Country, Continent, Valuation, Funding, YearFounded, Year, SelectInvestors) AS
(SELECT inf.ID, inf.Company, inf.Industry, inf.City, inf.Country, inf.Continent, fin.Valuation, fin.Funding, inf.YearFounded,
fin.Year, fin.SelectInvestors
FROM UnicornCompanies.dbo.unicorn_info AS inf
INNER JOIN UnicornCompanies.dbo.unicorn_finance AS fin
ON inf.ID = fin.ID)
SELECT CAST(AVG(Year - YearFounded) AS INT) AS AverageYear
FROM UnicornCom
-- > On average it takes 6 years to become a unicorn company
-- Details on how long it takes for the companies to become a unicorn
WITH UnicornCom (ID, Company, Industry, City, Country, Continent, Valuation, Funding, YearFounded, Year, SelectInvestors) AS
(SELECT inf.ID, inf.Company, inf.Industry, inf.City, inf.Country, inf.Continent, fin.Valuation, fin.Funding, inf.YearFounded,
fin.Year, fin.SelectInvestors
FROM UnicornCompanies.dbo.unicorn_info AS inf
INNER JOIN UnicornCompanies.dbo.unicorn_finance AS fin
ON inf.ID = fin.ID)
SELECT TOP 10 (Year - YearFounded) AS UnicornYear, COUNT(1) AS Frequency
FROM UnicornCom
WHERE (Year - YearFounded) >= 0
GROUP BY (Year - YearFounded)
ORDER BY COUNT(1) DESC
-- Mostly take from 4 to 7 years to become a unicorn
--------------------------------------------------------------------------------------------------------
/*
- Which industries have the most unicorns?
*/
-- Number of unicorn companies within each industry
WITH UnicornCom (ID, Company, Industry, City, Country, Continent, Valuation, Funding, YearFounded, Year, SelectInvestors) AS
(SELECT inf.ID, inf.Company, inf.Industry, inf.City, inf.Country, inf.Continent, fin.Valuation, fin.Funding, inf.YearFounded,
fin.Year, fin.SelectInvestors
FROM UnicornCompanies.dbo.unicorn_info AS inf
INNER JOIN UnicornCompanies.dbo.unicorn_finance AS fin
ON inf.ID = fin.ID)
SELECT Industry, COUNT(1) as Frequency
FROM UnicornCom
WHERE (Year - YearFounded) >= 0
GROUP BY Industry
ORDER BY COUNT(1) DESC
-- > Fintech followed by Internet software and services and e-commerce.
-- Number of unicorn companies within each industry and their shares
WITH UnicornCom (ID, Company, Industry, City, Country, Continent, Valuation, Funding, YearFounded, Year, SelectInvestors) AS
(SELECT inf.ID, inf.Company, inf.Industry, inf.City, inf.Country, inf.Continent, fin.Valuation, fin.Funding, inf.YearFounded,
fin.Year, fin.SelectInvestors
FROM UnicornCompanies.dbo.unicorn_info AS inf
INNER JOIN UnicornCompanies.dbo.unicorn_finance AS fin
ON inf.ID = fin.ID
)
SELECT Industry, Count(1) AS Frequency, CAST(COUNT(1) * 100.0 / (SELECT COUNT(*) FROM UnicornCom) AS INT) AS 'Percentage'
FROM UnicornCom
WHERE (Year - YearFounded) >= 0
GROUP BY Industry
ORDER BY Count(1) DESC
--------------------------------------------------------------------------------------------------------
/*
- Which countries have the most unicorns?
*/
-- Number of unicorn companies within each country
WITH UnicornCom (ID, Company, Industry, City, Country, Continent, Valuation, Funding, YearFounded, Year, SelectInvestors) AS
(SELECT inf.ID, inf.Company, inf.Industry, inf.City, inf.Country, inf.Continent, fin.Valuation, fin.Funding, inf.YearFounded,
fin.Year, fin.SelectInvestors
FROM UnicornCompanies.dbo.unicorn_info AS inf
INNER JOIN UnicornCompanies.dbo.unicorn_finance AS fin
ON inf.ID = fin.ID
)
SELECT Country, COUNT(1) AS Frequency
FROM UnicornCom
WHERE (Year - YearFounded) >= 0
GROUP BY Country
ORDER BY Count(1) DESC
-- United States followed by China and India.
-- Number of unicorn companies within each country and their shares
WITH UnicornCom (ID, Company, Industry, City, Country, Continent, Valuation, Funding, YearFounded, Year, SelectInvestors) AS
(SELECT inf.ID, inf.Company, inf.Industry, inf.City, inf.Country, inf.Continent, fin.Valuation, fin.Funding, inf.YearFounded,
fin.Year, fin.SelectInvestors
FROM UnicornCompanies.dbo.unicorn_info AS inf
INNER JOIN UnicornCompanies.dbo.unicorn_finance AS fin
ON inf.ID = fin.ID
)
SELECT TOP 10 Country, COUNT(1) AS Frequency, CAST(COUNT(1) * 100.0 / (SELECT COUNT(*) FROM UnicornCom) AS INT) AS 'Percentage'
FROM UnicornCom
WHERE (Year - YearFounded) >= 0
GROUP BY Country
ORDER BY Count(1) DESC
--------------------------------------------------------------------------------------------------------
/*
- Which investors have funded the most unicorns?
*/
SELECT *
FROM UnicornCompanies.dbo.unicorn_finance
ORDER BY 1 ASC
-- Replace ', ' with ',' before doing the split
UPDATE UnicornCompanies.dbo.unicorn_finance
SET SelectInvestors = REPLACE(SelectInvestors, ', ', ',')
-- Get investor name list with their count
SELECT TOP 10 value AS Investors, COUNT(*) AS UnicornsInvested
FROM UnicornCompanies.dbo.unicorn_finance
CROSS APPLY STRING_SPLIT(SelectInvestors, ',')
GROUP BY value
ORDER BY COUNT(*) DESC
-- > Accel followed by Tiger Glabal Management and Andreessen Horowitz