Skip to content

Commit 947cdfd

Browse files
committed
[ADB2022] add lab 8,9
1 parent e1f75ed commit 947cdfd

9 files changed

+709
-0
lines changed

Advanced databases 2022/Lab 8-9 (Analysis of input data and constraints of columns)/lab8-9.md

+413
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"scrolled": true
8+
},
9+
"outputs": [],
10+
"source": [
11+
"# import data\n",
12+
"\n",
13+
"import pandas as pd\n",
14+
"\n",
15+
"data = pd.read_excel('input_data.xlsx')\n",
16+
"\n",
17+
"data"
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"execution_count": null,
23+
"metadata": {},
24+
"outputs": [],
25+
"source": [
26+
"# read heders\n",
27+
"\n",
28+
"print(data.columns)"
29+
]
30+
},
31+
{
32+
"cell_type": "code",
33+
"execution_count": null,
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"data.area.describe()"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"execution_count": null,
43+
"metadata": {
44+
"scrolled": true
45+
},
46+
"outputs": [],
47+
"source": [
48+
"data.info()"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": null,
54+
"metadata": {
55+
"scrolled": true
56+
},
57+
"outputs": [],
58+
"source": [
59+
"# find all unique city\n",
60+
"\n",
61+
"all_city = data['city'].unique()\n",
62+
"print(\"City array: {0}\".format(all_city))\n",
63+
"\n",
64+
"# find all unique country\n",
65+
"\n",
66+
"all_country = data['country'].unique()\n",
67+
"print(\"Country array: {0}\".format(all_country))"
68+
]
69+
},
70+
{
71+
"cell_type": "code",
72+
"execution_count": null,
73+
"metadata": {},
74+
"outputs": [],
75+
"source": [
76+
"# mapping value\n",
77+
"dicionary_corect = {'US':'USA', 'USA':'USA', ' United States of America':'USA', 'America':'USA', 'Poland':'POL', 'PL':'POL', 'Polska':'POL' }\n",
78+
"mapping_country = data['country'].map(dicionary_corect)\n",
79+
"\n",
80+
"data['country'] = mapping_country\n",
81+
"\n",
82+
"data"
83+
]
84+
},
85+
{
86+
"cell_type": "code",
87+
"execution_count": null,
88+
"metadata": {},
89+
"outputs": [],
90+
"source": [
91+
"# check area and population value\n",
92+
"\n",
93+
"for city in all_city:\n",
94+
" # get uniqe value area and population for the city\n",
95+
" area = data[(data['city']==city) & (~data['area'].isna())]['area'].unique()\n",
96+
" population = data[(data['city']==city) & (~data['population'].isna())]['population'].unique()\n",
97+
" if len(area) == 1:\n",
98+
" data.loc[(data['city']==city) & (data['area'].isna()), 'area'] = area\n",
99+
" else:\n",
100+
" print('Area data mismatch on the context of {0}'.format(city))\n",
101+
" \n",
102+
" if len(population) == 1:\n",
103+
" data.loc[(data['city']==city) & (data['population'].isna()), 'population'] = population\n",
104+
" else:\n",
105+
" print('Population data mismatch on the context of {0}'.format(city))\n",
106+
"\n",
107+
"data"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"execution_count": null,
113+
"metadata": {},
114+
"outputs": [],
115+
"source": [
116+
"# get country\n",
117+
"\n",
118+
"country_list = pd.DataFrame(data['country'].unique(), columns=['country'])\n",
119+
"\n",
120+
"country_list.index.name = 'id'\n",
121+
"\n",
122+
"country_list"
123+
]
124+
},
125+
{
126+
"cell_type": "code",
127+
"execution_count": null,
128+
"metadata": {},
129+
"outputs": [],
130+
"source": [
131+
"# get city and conect with country\n",
132+
"\n",
133+
"city_list = data[['city','country']].drop_duplicates().reset_index().drop(columns = ['index']);\n",
134+
"city_list.index.name = 'id'\n",
135+
"\n",
136+
"city_list = city_list.rename(columns = {'country':'country_id'})\n",
137+
"\n",
138+
"city_list"
139+
]
140+
},
141+
{
142+
"cell_type": "code",
143+
"execution_count": null,
144+
"metadata": {},
145+
"outputs": [],
146+
"source": [
147+
"city_list['country_id'] = city_list['country_id'].map(lambda x: country_list[country_list['country'] == x].index.values.astype(int)[0])\n",
148+
"\n",
149+
"city_list"
150+
]
151+
},
152+
{
153+
"cell_type": "code",
154+
"execution_count": null,
155+
"metadata": {},
156+
"outputs": [],
157+
"source": [
158+
"# get area and population\n",
159+
"\n",
160+
"city_pop_area = data[['city','area', 'population', 'president']].drop_duplicates().reset_index().drop(columns = ['index']);\n",
161+
"city_pop_area.index.name = 'id'\n",
162+
"\n",
163+
"city_pop_area = city_pop_area.rename(columns = {'city':'city_id'})\n",
164+
"\n",
165+
"city_pop_area['city_id'] = city_pop_area['city_id'].map(lambda x: city_list[city_list['city'] == x].index.values.astype(int)[0])\n",
166+
"\n",
167+
"city_pop_area"
168+
]
169+
},
170+
{
171+
"cell_type": "code",
172+
"execution_count": null,
173+
"metadata": {},
174+
"outputs": [],
175+
"source": [
176+
"# get city and monument\n",
177+
"\n",
178+
"city_monuments = data[['city', 'monument']].drop_duplicates().dropna().reset_index().drop(columns = ['index']);\n",
179+
"city_monuments.index.name = 'id'\n",
180+
"\n",
181+
"city_monuments = city_monuments.rename(columns = {'city':'city_id'})\n",
182+
"\n",
183+
"city_monuments['city_id'] = city_monuments['city_id'].map(lambda x: city_list[city_list['city'] == x].index.values.astype(int)[0])\n",
184+
"\n",
185+
"city_monuments"
186+
]
187+
},
188+
{
189+
"cell_type": "code",
190+
"execution_count": 1,
191+
"metadata": {},
192+
"outputs": [],
193+
"source": [
194+
"#Table definition and insert data\n",
195+
"\n",
196+
"from sqlalchemy import create_engine\n",
197+
"from sqlalchemy.ext.declarative import declarative_base\n",
198+
"\n",
199+
"db_string = \"postgres://postgres:postgres@127.0.0.1:5432/testAGH\"\n",
200+
"\n",
201+
"engine = create_engine(db_string)\n",
202+
"\n",
203+
"Base = declarative_base()\n",
204+
"\n",
205+
"# Import column structure and constraints\n",
206+
"\n",
207+
"from sqlalchemy import Column, Integer, String, Float, ForeignKey, Sequence, CheckConstraint, UniqueConstraint\n",
208+
"\n",
209+
"class Country(Base):\n",
210+
" __tablename__ = 'countryies'\n",
211+
" __table_args__ = (\n",
212+
" CheckConstraint('length(country) = 3'),\n",
213+
" UniqueConstraint('country'),\n",
214+
" )\n",
215+
" id = Column(Integer, Sequence('seq_country_id'), primary_key = True)\n",
216+
" country = Column(String(50), nullable = False)\n",
217+
"\n",
218+
"class City(Base):\n",
219+
" __tablename__ = 'cities'\n",
220+
" __table_args__ = (\n",
221+
" CheckConstraint('length(city) > 0'),\n",
222+
" )\n",
223+
" id = Column(Integer, Sequence('seq_city_id'), primary_key=True)\n",
224+
" country_id = Column(Integer, ForeignKey('countryies.id'))\n",
225+
" city = Column(String, nullable = False)\n",
226+
" \n",
227+
"class City_data(Base):\n",
228+
" __tablename__ = 'city_data'\n",
229+
" __table_args__ = (\n",
230+
" CheckConstraint('area > 0'),\n",
231+
" CheckConstraint('population >= 0')\n",
232+
" )\n",
233+
" id = Column(Integer, Sequence('seq_city_data_id'), primary_key=True )\n",
234+
" city_id = Column(Integer, ForeignKey('cities.id'))\n",
235+
" area = Column(Float, nullable = False, default=0)\n",
236+
" population = Column(Integer, nullable = False, default=0)\n",
237+
" president = Column(String(60), nullable = True, default='')\n",
238+
" \n",
239+
"class Monument(Base):\n",
240+
" __tablename__ = 'monuments'\n",
241+
" __table_args__ = (\n",
242+
" CheckConstraint('length(monument) > 0'),\n",
243+
" )\n",
244+
" id = Column(Integer, Sequence('seq_monument_id'), primary_key=True )\n",
245+
" city_id = Column(Integer, ForeignKey('cities.id'))\n",
246+
" monument = Column(String(100), nullable = True)\n",
247+
"\n",
248+
"Base.metadata.create_all(engine)"
249+
]
250+
},
251+
{
252+
"cell_type": "code",
253+
"execution_count": null,
254+
"metadata": {},
255+
"outputs": [],
256+
"source": [
257+
"country_list.to_sql('countryies',engine, if_exists='append')\n",
258+
"city_list.to_sql('cityies',engine, if_exists='append')\n",
259+
"city_pop_area.to_sql('city_data',engine, if_exists='append')\n",
260+
"city_monuments.to_sql('monuments',engine, if_exists='append')"
261+
]
262+
},
263+
{
264+
"cell_type": "code",
265+
"execution_count": null,
266+
"metadata": {},
267+
"outputs": [],
268+
"source": []
269+
}
270+
],
271+
"metadata": {
272+
"kernelspec": {
273+
"name": "python37664bit8db317473b104a8fa7579acc07e75f1f",
274+
"display_name": "Python 3.7.6 64-bit"
275+
},
276+
"language_info": {
277+
"codemirror_mode": {
278+
"name": "ipython",
279+
"version": 3
280+
},
281+
"file_extension": ".py",
282+
"mimetype": "text/x-python",
283+
"name": "python",
284+
"nbconvert_exporter": "python",
285+
"pygments_lexer": "ipython3",
286+
"version": "3.7.6"
287+
},
288+
"metadata": {
289+
"interpreter": {
290+
"hash": "9164a3399a70d355c381b62813f30880ed90ca5a6f321bf0d85375640bda7ee5"
291+
}
292+
}
293+
},
294+
"nbformat": 4,
295+
"nbformat_minor": 2
296+
}

0 commit comments

Comments
 (0)