OpenSeizureDetector · jpordoy · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025 · Jan 23, 2025
diff --git a/user_tools/dataVisualiser/datasets/osdb_3min_allSeizures.json b/user_tools/dataVisualiser/datasets/osdb_3min_allSeizures.json
diff --git a/user_tools/dataVisualiser/label_generator.py b/user_tools/dataVisualiser/label_generator.py
@@ -0,0 +1,122 @@
+import pandas as pd
+import numpy as np
+import json
+
+class LabelGenerator:
+    def __init__(self, file_path, sampling_rate=25):
+        self.file_path = file_path  # Path to the JSON file
+        self.sampling_rate = sampling_rate  # Sampling rate (Hz)
+        self.df_sensordata = None  # To store the processed DataFrame
+
+    def load_data(self):
+        """Load and flatten the JSON data into a DataFrame."""
+        with open(self.file_path, 'r') as file:
+            raw_json = json.load(file)
+
+        flattened_data = []
+        for attribute in raw_json:
+            user_id = attribute.get('userId', None)
+            seizure_times = attribute.get('seizureTimes', [])
+            datapoints = attribute.get('datapoints', [])
+
+            for point in datapoints:
+                event_id = point.get('eventId', None)
+                hr = point.get('hr', [])
+                o2Sat = point.get('o2Sat', [])
+                rawData = point.get('rawData', [])
+                rawData3D = point.get('rawData3D', [])
+                alarmPhrase = point.get('alarmPhrase', None)
+                flattened_data.append({
+                    'eventId': event_id,
+                    'userId': user_id,
+                    'hr': hr,
+                    'o2Sat': o2Sat,
+                    'rawData': rawData,
+                    'rawData3D': rawData3D,
+                    'seizure_times': seizure_times,
+                    'alarmPhrase': alarmPhrase
+                })
+
+        # Convert to DataFrame
+        self.df_sensordata = pd.DataFrame(flattened_data)
+
+    def calculate_fft(self, raw_data):
+        """Calculate FFT for the raw data."""
+        raw_data = raw_data - np.mean(raw_data)  # Remove the DC component
+        fft_result = np.fft.fft(raw_data)  # Compute FFT
+        frequencies = np.fft.fftfreq(len(raw_data), d=1/self.sampling_rate)  # Compute frequencies
+        fft_magnitude = np.abs(fft_result)  # Compute the magnitude
+        positive_frequencies = frequencies[:len(frequencies)//2]  # Only positive frequencies
+        positive_fft_magnitude = fft_magnitude[:len(frequencies)//2]  # Only positive FFT magnitudes
+        return positive_frequencies, positive_fft_magnitude
+
+    def add_fft_column(self):
+        """Add an FFT column to the DataFrame."""
+        fft_results = []
+        for _, row in self.df_sensordata.iterrows():
+            raw_data = np.array(row['rawData'])
+            _, positive_fft_magnitude = self.calculate_fft(raw_data)  # Calculate FFT for the row
+            fft_results.append(list(positive_fft_magnitude))  # Append FFT result
+        self.df_sensordata['FFT'] = fft_results
+
+    def add_timestep_and_label(self):
+        """Add timestep and label columns to the DataFrame."""
+        # Add 'timestep' column in 5-second increments
+        self.df_sensordata['timestep'] = self.df_sensordata.index * 5
+
+        # Add 'label' column, initialized to 0
+        self.df_sensordata['label'] = 0
+
+    def label_alarm_events(self):
+        """Label the data based on alarm events."""
+        for idx, row in self.df_sensordata.iterrows():
+            if row['alarmPhrase'] == 'ALARM':  # If alarmPhrase is ALARM
+                alarm_time = row['timestep']
+                seizure_times = row['seizure_times']
+
+                # Process the seizure times list in seconds
+                for seizure in seizure_times:
+                    start_time = alarm_time + seizure  # Adjust by the seizure offset
+
+                    # Label the rows before and after the alarm (within the range of seizure_times)
+                    before_idx = self.df_sensordata[(self.df_sensordata['timestep'] >= start_time) &
+                                                     (self.df_sensordata['timestep'] < alarm_time)].index
+                    self.df_sensordata.loc[before_idx, 'label'] = 1  # Mark as seizure (1)
+
+                    # For the positive offset (after alarm)
+                    after_idx = self.df_sensordata[(self.df_sensordata['timestep'] >= alarm_time) &
+                                                    (self.df_sensordata['timestep'] <= start_time)].index
+                    self.df_sensordata.loc[after_idx, 'label'] = 1  # Mark as seizure (1)
+
+    def process_data(self):
+        """Process the data through all stages and return the final DataFrame."""
+        # Step 1: Load the data
+        self.load_data()
+
+        # Step 2: Add FFT column
+        self.add_fft_column()
+
+        # Step 3: Add timestep and label columns
+        self.add_timestep_and_label()
+
+        # Step 4: Label based on alarm events
+        self.label_alarm_events()
+
+        # Step 5: Drop the 'seizure_times' column
+        self.df_sensordata.drop(columns=['seizure_times'], inplace=True)
+
+        return self.df_sensordata
+
+
+# Example usage
+file_path = '../../tests/testData/testDataVisualisation.json'  # Replace with your JSON file path
+processor = LabelGenerator(file_path)
+
+# Process the data and get the resulting DataFrame
+df_result = processor.process_data()
+
+# Optionally save the DataFrame to a CSV file
+df_result.to_csv('generatedCsvDatasets/sensordata_labeled.csv', index=False)
+
+# Display the first few rows of the processed DataFrame
+print(df_result.head(30))
diff --git a/user_tools/scripts/ConvolutionalNeuralNetwork.v1.24.ipynb b/user_tools/scripts/ConvolutionalNeuralNetwork.v1.24.ipynb
@@ -1008,7 +1008,6 @@
       ],
       "source": [
         "# Print Lables and features to see if they have same value\n",
-        "\n",
         "#Labels for the train dataset\n",
         "train_labels = df[[\"outcome\"]]\n",
         "print(train_labels.shape)\n",

diff --git a/user_tools/scripts/DataLoader_OSDB_version1.ipynb b/user_tools/scripts/DataLoader_OSDB_version1.ipynb
@@ -0,0 +1,254 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Core Imports\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt\n",
+    "import warnings\n",
+    "from itertools import cycle\n",
+    "\n",
+    "# Ignore warnings in Python\n",
+    "warnings.filterwarnings('ignore')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>eventId</th>\n",
+       "      <th>userId</th>\n",
+       "      <th>type</th>\n",
+       "      <th>subType</th>\n",
+       "      <th>hr</th>\n",
+       "      <th>rawData</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>9291</th>\n",
+       "      <td>9292</td>\n",
+       "      <td>14898</td>\n",
+       "      <td>156</td>\n",
+       "      <td>Fall</td>\n",
+       "      <td>Uncontrolled</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1004.324016377599, 1005.6113176704044, 1008.3...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9292</th>\n",
+       "      <td>9293</td>\n",
+       "      <td>14898</td>\n",
+       "      <td>156</td>\n",
+       "      <td>Fall</td>\n",
+       "      <td>Uncontrolled</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1000.1011495342927, 1004.0482531052368, 1000....</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9293</th>\n",
+       "      <td>9294</td>\n",
+       "      <td>14898</td>\n",
+       "      <td>156</td>\n",
+       "      <td>Fall</td>\n",
+       "      <td>Uncontrolled</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1001.1701269235376, 1002.7462560430771, 998.6...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9294</th>\n",
+       "      <td>9295</td>\n",
+       "      <td>14898</td>\n",
+       "      <td>156</td>\n",
+       "      <td>Fall</td>\n",
+       "      <td>Uncontrolled</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1012.4248508971073, 1004.5334132492027, 1003....</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9295</th>\n",
+       "      <td>9296</td>\n",
+       "      <td>14898</td>\n",
+       "      <td>156</td>\n",
+       "      <td>Fall</td>\n",
+       "      <td>Uncontrolled</td>\n",
+       "      <td>0</td>\n",
+       "      <td>[1004.9762153019517, 1000.7225907604744, 1005....</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        Id  eventId  userId  type       subType  hr  \\\n",
+       "9291  9292    14898     156  Fall  Uncontrolled   0   \n",
+       "9292  9293    14898     156  Fall  Uncontrolled   0   \n",
+       "9293  9294    14898     156  Fall  Uncontrolled   0   \n",
+       "9294  9295    14898     156  Fall  Uncontrolled   0   \n",
+       "9295  9296    14898     156  Fall  Uncontrolled   0   \n",
+       "\n",
+       "                                                rawData  \n",
+       "9291  [1004.324016377599, 1005.6113176704044, 1008.3...  \n",
+       "9292  [1000.1011495342927, 1004.0482531052368, 1000....  \n",
+       "9293  [1001.1701269235376, 1002.7462560430771, 998.6...  \n",
+       "9294  [1012.4248508971073, 1004.5334132492027, 1003....  \n",
+       "9295  [1004.9762153019517, 1000.7225907604744, 1005....  "
+      ]
+     },
+     "execution_count": 105,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import json\n",
+    "import pandas as pd\n",
+    "\n",
+    "# List of file paths\n",
+    "file_paths = [\n",
+    "    'C:/Users/jamie/Desktop/PhD Research/Experimental Research Code + Datasets/Experimental Research Code + Datasets/Early Version of Open Seizure Database/Open Seizure Database/osdb_3min_allSeizures.json',\n",
+    "    'C:/Users/jamie/Desktop/PhD Research/Experimental Research Code + Datasets/Experimental Research Code + Datasets/Early Version of Open Seizure Database/Open Seizure Database/osdb_3min_falseAlarms.json',\n",
+    "    'C:/Users/jamie/Desktop/PhD Research/Experimental Research Code + Datasets/Experimental Research Code + Datasets/Early Version of Open Seizure Database/Open Seizure Database/osdb_3min_fallEvents.json'\n",
+    "]\n",
+    "\n",
+    "# Initialize an empty list to store data from all files\n",
+    "all_rows = []\n",
+    "current_id = 1  # Start Id counter\n",
+    "\n",
+    "# Process each file\n",
+    "for file_path in file_paths:\n",
+    "    with open(file_path, 'r') as file:\n",
+    "        data = json.load(file)\n",
+    "\n",
+    "    # Extract data from the JSON file\n",
+    "    for event in data:\n",
+    "        event_id = event.get(\"id\")\n",
+    "        user_id = event.get(\"userId\")\n",
+    "        event_type = event.get(\"type\")\n",
+    "        sub_type = event.get(\"subType\")\n",
+    "        hr1 = event.get('hr')\n",
+    "\n",
+    "\n",
+    "        # Loop through each datapoint within the event\n",
+    "        for datapoint in event.get(\"datapoints\", []):\n",
+    "            raw_data = None\n",
+    "\n",
+    "            # Extract rawData from the nested dataJSON field\n",
+    "            try:\n",
+    "                nested_data = json.loads(datapoint.get(\"dataJSON\", \"{}\"))\n",
+    "                nested_inner_data = json.loads(nested_data.get(\"dataJSON\", \"{}\"))\n",
+    "                hr = nested_inner_data.get(\"hr\")\n",
+    "                raw_data = nested_inner_data.get(\"rawData\")\n",
+    "            except json.JSONDecodeError:\n",
+    "                pass\n",
+    "\n",
+    "            # Append the extracted data with the current Id\n",
+    "            all_rows.append({\n",
+    "                \"Id\": current_id,\n",
+    "                \"eventId\": event_id,\n",
+    "                \"userId\": user_id,\n",
+    "                \"type\": event_type,\n",
+    "                \"subType\": sub_type,\n",
+    "                \"hr\": hr,\n",
+    "                \"rawData\": raw_data\n",
+    "            })\n",
+    "            current_id += 1  # Increment Id\n",
+    "\n",
+    "# Create a single DataFrame\n",
+    "df = pd.DataFrame(all_rows)\n",
+    "\n",
+    "# Display the DataFrame\n",
+    "df.tail()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Unique subTypes after filtering, renaming, and replacing spaces: ['Other_Seizure' 'Tonic-Clonic' 'Aura' 'Typing' 'Pushing_Pram/Wheelchair'\n",
+      " 'Washing_/_cleaning' 'Other_False_Alarm' 'Talking' 'Brushing_Teeth'\n",
+      " 'Sorting' 'Motor_Vehicle' 'Cycling' 'Computer_Games' 'Fall']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Define the subType values to drop\n",
+    "subtypes_to_drop = [None, 'null', '', 'Unknown']\n",
+    "\n",
+    "# Identify eventIds that have these subType values\n",
+    "event_ids_to_drop = df[df['subType'].isin(subtypes_to_drop)]['eventId'].unique()\n",
+    "\n",
+    "# Filter out rows with these eventIds\n",
+    "df_filtered = df[~df['eventId'].isin(event_ids_to_drop)]\n",
+    "\n",
+    "# Rename 'Uncontrolled' to 'Fall'\n",
+    "#df_filtered['subType'] = df_filtered['subType'].replace('Uncontrolled', 'Fall')\n",
+    "df_filtered.loc[df_filtered['subType'] == 'Other', 'subType'] = 'Other_Seizure'\n",
+    "df_filtered.loc[df_filtered['subType'] == 'Uncontrolled', 'subType'] = 'Fall'\n",
+    "df_filtered.loc[df_filtered['subType'] == 'Other (Please describe in notes)', 'subType'] = 'Other_False_Alarm'\n",
+    "\n",
+    "# Replace spaces with underscores in the 'subType' column\n",
+    "df_filtered.loc[:, 'subType'] = df_filtered['subType'].str.replace(' ', '_')\n",
+    "\n",
+    "# Print all unique subType values after replacement\n",
+    "unique_subtypes = df_filtered['subType'].unique()\n",
+    "print(\"Unique subTypes after filtering, renaming, and replacing spaces:\", unique_subtypes)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}