fixed score calculator and added total score calculation

UdayLab · Oct 29, 2024 · d1a18a0 · d1a18a0
1 parent 6b28609
commit d1a18a0
Show file tree

Hide file tree

Showing 6 changed files with 96 additions and 46 deletions.
diff --git a/geoAnalytics.zip b/geoAnalytics.zip
diff --git a/geoAnalytics/__pycache__/repository.cpython-312.pyc b/geoAnalytics/__pycache__/repository.cpython-312.pyc
diff --git a/geoAnalytics/__pycache__/scoreCalculator.cpython-312.pyc b/geoAnalytics/__pycache__/scoreCalculator.cpython-312.pyc
diff --git a/geoAnalytics/repository.py b/geoAnalytics/repository.py
@@ -328,6 +328,8 @@ def filter(self, filterFile):
 
     def calculate_scores_for_row(self, row):
     # Apply scoring only on relevant columns (from 3rd column onward)
+        # range(2, len(row)) is used to skip the first two columns (x and y)
+        # [j-2] is used to get the correct score object for the column because the first two columns are skipped aka x, y
         return [self.scores[j - 2].calculate_score(row[j]) for j in range(2, len(row))]
 
 
@@ -344,6 +346,14 @@ def filtering(self, dataframe, filterFile):
         dataframe[[f'score_{i}' for i in range(score_columns.shape[1])]] = score_columns
 
         return dataframe
+
+    def total_score(self, dataframe):
+        """
+        Calculate the total score for the dataframe.
+        """
+        # add a new column to the dataframe that contains the sum of all score columns divided by the number of score columns
+        num_score_cols = sum('score_' in col for col in dataframe.columns)
+
+        dataframe['total_score'] = dataframe[[f'score_{i}' for i in range(1, num_score_cols)]].sum(axis=1) / num_score_cols
 
-
-
+        return dataframe
diff --git a/geoAnalytics/scoreCalculator.py b/geoAnalytics/scoreCalculator.py
@@ -3,14 +3,18 @@ def __init__(self, min_val, max_val, avg_val):
         self.min_val = min_val
         self.max_val = max_val
         self.avg_val = avg_val
-        # Precompute the maximum possible distance from the average
-        self.max_distance = max(max_val - avg_val, avg_val - min_val)
-
+
     def calculate_score(self, value):
-        if value < self.min_val or value > self.max_val:
+        if value == self.avg_val:
+            return 0
+        elif value == self.min_val or value == self.max_val:
+            return 0.5
+        elif value > self.max_val or value < self.min_val:
             return 1
+        elif value < self.avg_val:
+            # Linear interpolation between min_val and avg_val
+            return 0.5 * (1 - (value - self.min_val) / (self.avg_val - self.min_val))
         else:
-            # Use the precomputed max distance
-            distance_from_avg = abs(value - self.avg_val)
-            score = distance_from_avg / self.max_distance
-            return score
+            # Linear interpolation between avg_val and max_val
+            return 0.5 * (1 - (self.max_val - value) / (self.max_val - self.avg_val))
+
diff --git a/geoAnalytics/testCase.ipynb b/geoAnalytics/testCase.ipynb
@@ -77,18 +77,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
    "id": "139f3edf",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/tarunsreepada/Github/geoAnalytics/geoAnalytics/repository.py:303: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
-      "  df = pd.read_sql(sql, self.conn)\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -108,6 +100,14 @@
       "\n",
       "[27000 rows x 5 columns]\n"
      ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/tarunsreepada/Github/geoAnalytics/geoAnalytics/repository.py:303: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n",
+      "  df = pd.read_sql(sql, self.conn)\n"
+     ]
     }
    ],
    "source": [
@@ -117,15 +117,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 4,
    "id": "d51ff67e",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/tarunsreepada/Github/geoAnalytics/geoAnalytics/repository.py:331: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
+      "/Users/tarunsreepada/Github/geoAnalytics/geoAnalytics/repository.py:333: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
       "  return [self.scores[j - 2].calculate_score(row[j]) for j in range(2, len(row))]\n"
      ]
     },
@@ -168,9 +168,9 @@
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.086957</td>\n",
-       "      <td>0.626866</td>\n",
-       "      <td>0.386503</td>\n",
+       "      <td>0.068966</td>\n",
+       "      <td>0.313433</td>\n",
+       "      <td>0.193252</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -179,9 +179,9 @@
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.086957</td>\n",
-       "      <td>0.626866</td>\n",
-       "      <td>0.386503</td>\n",
+       "      <td>0.068966</td>\n",
+       "      <td>0.313433</td>\n",
+       "      <td>0.193252</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -190,9 +190,9 @@
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.086957</td>\n",
-       "      <td>0.626866</td>\n",
-       "      <td>0.386503</td>\n",
+       "      <td>0.068966</td>\n",
+       "      <td>0.313433</td>\n",
+       "      <td>0.193252</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -201,9 +201,9 @@
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.086957</td>\n",
-       "      <td>0.626866</td>\n",
-       "      <td>0.386503</td>\n",
+       "      <td>0.068966</td>\n",
+       "      <td>0.313433</td>\n",
+       "      <td>0.193252</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -212,9 +212,9 @@
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.086957</td>\n",
-       "      <td>0.626866</td>\n",
-       "      <td>0.386503</td>\n",
+       "      <td>0.068966</td>\n",
+       "      <td>0.313433</td>\n",
+       "      <td>0.193252</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -289,11 +289,11 @@
       ],
       "text/plain": [
        "           x     y       b1       b2       b3   score_0   score_1   score_2\n",
-       "0        0.5  89.5      0.0      0.0      0.0  0.086957  0.626866  0.386503\n",
-       "1        1.5  89.5      0.0      0.0      0.0  0.086957  0.626866  0.386503\n",
-       "2        2.5  89.5      0.0      0.0      0.0  0.086957  0.626866  0.386503\n",
-       "3        3.5  89.5      0.0      0.0      0.0  0.086957  0.626866  0.386503\n",
-       "4        4.5  89.5      0.0      0.0      0.0  0.086957  0.626866  0.386503\n",
+       "0        0.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252\n",
+       "1        1.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252\n",
+       "2        2.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252\n",
+       "3        3.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252\n",
+       "4        4.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252\n",
        "...      ...   ...      ...      ...      ...       ...       ...       ...\n",
        "26995  295.5   0.5  65534.0  65534.0  65534.0  1.000000  1.000000  1.000000\n",
        "26996  296.5   0.5  65534.0  65534.0  65534.0  1.000000  1.000000  1.000000\n",
@@ -304,7 +304,7 @@
        "[27000 rows x 8 columns]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -315,11 +315,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "d17f1842",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "           x     y       b1       b2       b3   score_0   score_1   score_2  \\\n",
+      "0        0.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252   \n",
+      "1        1.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252   \n",
+      "2        2.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252   \n",
+      "3        3.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252   \n",
+      "4        4.5  89.5      0.0      0.0      0.0  0.068966  0.313433  0.193252   \n",
+      "...      ...   ...      ...      ...      ...       ...       ...       ...   \n",
+      "26995  295.5   0.5  65534.0  65534.0  65534.0  1.000000  1.000000  1.000000   \n",
+      "26996  296.5   0.5  65534.0  65534.0  65534.0  1.000000  1.000000  1.000000   \n",
+      "26997  297.5   0.5  65534.0  65534.0  65534.0  1.000000  1.000000  1.000000   \n",
+      "26998  298.5   0.5  65534.0  65534.0  65534.0  1.000000  1.000000  1.000000   \n",
+      "26999  299.5   0.5  65534.0  65534.0  65534.0  1.000000  1.000000  1.000000   \n",
+      "\n",
+      "       total_score  \n",
+      "0         0.168895  \n",
+      "1         0.168895  \n",
+      "2         0.168895  \n",
+      "3         0.168895  \n",
+      "4         0.168895  \n",
+      "...            ...  \n",
+      "26995     0.666667  \n",
+      "26996     0.666667  \n",
+      "26997     0.666667  \n",
+      "26998     0.666667  \n",
+      "26999     0.666667  \n",
+      "\n",
+      "[27000 rows x 9 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(obj.total_score(df))"
+   ]
   },
   {
    "cell_type": "code",
@@ -475,7 +511,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },