Skip to content

Commit

Permalink
fixed score calculator and added total score calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
Tarun-Sreepada committed Oct 29, 2024
1 parent 6b28609 commit d1a18a0
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 46 deletions.
Binary file added geoAnalytics.zip
Binary file not shown.
Binary file modified geoAnalytics/__pycache__/repository.cpython-312.pyc
Binary file not shown.
Binary file modified geoAnalytics/__pycache__/scoreCalculator.cpython-312.pyc
Binary file not shown.
14 changes: 12 additions & 2 deletions geoAnalytics/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ def filter(self, filterFile):

def calculate_scores_for_row(self, row):
# Apply scoring only on relevant columns (from 3rd column onward)
# range(2, len(row)) is used to skip the first two columns (x and y)
# [j-2] is used to get the correct score object for the column because the first two columns are skipped aka x, y
return [self.scores[j - 2].calculate_score(row[j]) for j in range(2, len(row))]


Expand All @@ -344,6 +346,14 @@ def filtering(self, dataframe, filterFile):
dataframe[[f'score_{i}' for i in range(score_columns.shape[1])]] = score_columns

return dataframe

def total_score(self, dataframe):
"""
Calculate the total score for the dataframe.
"""
# add a new column to the dataframe that contains the sum of all score columns divided by the number of score columns
num_score_cols = sum('score_' in col for col in dataframe.columns)

dataframe['total_score'] = dataframe[[f'score_{i}' for i in range(1, num_score_cols)]].sum(axis=1) / num_score_cols



return dataframe
20 changes: 12 additions & 8 deletions geoAnalytics/scoreCalculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@ def __init__(self, min_val, max_val, avg_val):
self.min_val = min_val
self.max_val = max_val
self.avg_val = avg_val
# Precompute the maximum possible distance from the average
self.max_distance = max(max_val - avg_val, avg_val - min_val)


def calculate_score(self, value):
if value < self.min_val or value > self.max_val:
if value == self.avg_val:
return 0
elif value == self.min_val or value == self.max_val:
return 0.5
elif value > self.max_val or value < self.min_val:
return 1
elif value < self.avg_val:
# Linear interpolation between min_val and avg_val
return 0.5 * (1 - (value - self.min_val) / (self.avg_val - self.min_val))
else:
# Use the precomputed max distance
distance_from_avg = abs(value - self.avg_val)
score = distance_from_avg / self.max_distance
return score
# Linear interpolation between avg_val and max_val
return 0.5 * (1 - (self.max_val - value) / (self.max_val - self.avg_val))

108 changes: 72 additions & 36 deletions geoAnalytics/testCase.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,10 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 3,
"id": "139f3edf",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/tarunsreepada/Github/geoAnalytics/geoAnalytics/repository.py:303: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
" df = pd.read_sql(sql, self.conn)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
Expand All @@ -108,6 +100,14 @@
"\n",
"[27000 rows x 5 columns]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/tarunsreepada/Github/geoAnalytics/geoAnalytics/repository.py:303: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
" df = pd.read_sql(sql, self.conn)\n"
]
}
],
"source": [
Expand All @@ -117,15 +117,15 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 4,
"id": "d51ff67e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/tarunsreepada/Github/geoAnalytics/geoAnalytics/repository.py:331: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
"/Users/tarunsreepada/Github/geoAnalytics/geoAnalytics/repository.py:333: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
" return [self.scores[j - 2].calculate_score(row[j]) for j in range(2, len(row))]\n"
]
},
Expand Down Expand Up @@ -168,9 +168,9 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.086957</td>\n",
" <td>0.626866</td>\n",
" <td>0.386503</td>\n",
" <td>0.068966</td>\n",
" <td>0.313433</td>\n",
" <td>0.193252</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
Expand All @@ -179,9 +179,9 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.086957</td>\n",
" <td>0.626866</td>\n",
" <td>0.386503</td>\n",
" <td>0.068966</td>\n",
" <td>0.313433</td>\n",
" <td>0.193252</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
Expand All @@ -190,9 +190,9 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.086957</td>\n",
" <td>0.626866</td>\n",
" <td>0.386503</td>\n",
" <td>0.068966</td>\n",
" <td>0.313433</td>\n",
" <td>0.193252</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
Expand All @@ -201,9 +201,9 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.086957</td>\n",
" <td>0.626866</td>\n",
" <td>0.386503</td>\n",
" <td>0.068966</td>\n",
" <td>0.313433</td>\n",
" <td>0.193252</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
Expand All @@ -212,9 +212,9 @@
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.086957</td>\n",
" <td>0.626866</td>\n",
" <td>0.386503</td>\n",
" <td>0.068966</td>\n",
" <td>0.313433</td>\n",
" <td>0.193252</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
Expand Down Expand Up @@ -289,11 +289,11 @@
],
"text/plain": [
" x y b1 b2 b3 score_0 score_1 score_2\n",
"0 0.5 89.5 0.0 0.0 0.0 0.086957 0.626866 0.386503\n",
"1 1.5 89.5 0.0 0.0 0.0 0.086957 0.626866 0.386503\n",
"2 2.5 89.5 0.0 0.0 0.0 0.086957 0.626866 0.386503\n",
"3 3.5 89.5 0.0 0.0 0.0 0.086957 0.626866 0.386503\n",
"4 4.5 89.5 0.0 0.0 0.0 0.086957 0.626866 0.386503\n",
"0 0.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252\n",
"1 1.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252\n",
"2 2.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252\n",
"3 3.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252\n",
"4 4.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252\n",
"... ... ... ... ... ... ... ... ...\n",
"26995 295.5 0.5 65534.0 65534.0 65534.0 1.000000 1.000000 1.000000\n",
"26996 296.5 0.5 65534.0 65534.0 65534.0 1.000000 1.000000 1.000000\n",
Expand All @@ -304,7 +304,7 @@
"[27000 rows x 8 columns]"
]
},
"execution_count": 7,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -315,11 +315,47 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "d17f1842",
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" x y b1 b2 b3 score_0 score_1 score_2 \\\n",
"0 0.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252 \n",
"1 1.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252 \n",
"2 2.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252 \n",
"3 3.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252 \n",
"4 4.5 89.5 0.0 0.0 0.0 0.068966 0.313433 0.193252 \n",
"... ... ... ... ... ... ... ... ... \n",
"26995 295.5 0.5 65534.0 65534.0 65534.0 1.000000 1.000000 1.000000 \n",
"26996 296.5 0.5 65534.0 65534.0 65534.0 1.000000 1.000000 1.000000 \n",
"26997 297.5 0.5 65534.0 65534.0 65534.0 1.000000 1.000000 1.000000 \n",
"26998 298.5 0.5 65534.0 65534.0 65534.0 1.000000 1.000000 1.000000 \n",
"26999 299.5 0.5 65534.0 65534.0 65534.0 1.000000 1.000000 1.000000 \n",
"\n",
" total_score \n",
"0 0.168895 \n",
"1 0.168895 \n",
"2 0.168895 \n",
"3 0.168895 \n",
"4 0.168895 \n",
"... ... \n",
"26995 0.666667 \n",
"26996 0.666667 \n",
"26997 0.666667 \n",
"26998 0.666667 \n",
"26999 0.666667 \n",
"\n",
"[27000 rows x 9 columns]\n"
]
}
],
"source": [
"print(obj.total_score(df))"
]
},
{
"cell_type": "code",
Expand Down Expand Up @@ -475,7 +511,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand Down

0 comments on commit d1a18a0

Please sign in to comment.