Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GAN Experiments for Synthetic Acceleration Data Generation #17

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

122 changes: 122 additions & 0 deletions user_tools/dataVisualiser/label_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pandas as pd
import numpy as np
import json

class LabelGenerator:
def __init__(self, file_path, sampling_rate=25):
self.file_path = file_path # Path to the JSON file
self.sampling_rate = sampling_rate # Sampling rate (Hz)
self.df_sensordata = None # To store the processed DataFrame

def load_data(self):
"""Load and flatten the JSON data into a DataFrame."""
with open(self.file_path, 'r') as file:
raw_json = json.load(file)

flattened_data = []
for attribute in raw_json:
user_id = attribute.get('userId', None)
seizure_times = attribute.get('seizureTimes', [])
datapoints = attribute.get('datapoints', [])

for point in datapoints:
event_id = point.get('eventId', None)
hr = point.get('hr', [])
o2Sat = point.get('o2Sat', [])
rawData = point.get('rawData', [])
rawData3D = point.get('rawData3D', [])
alarmPhrase = point.get('alarmPhrase', None)
flattened_data.append({
'eventId': event_id,
'userId': user_id,
'hr': hr,
'o2Sat': o2Sat,
'rawData': rawData,
'rawData3D': rawData3D,
'seizure_times': seizure_times,
'alarmPhrase': alarmPhrase
})

# Convert to DataFrame
self.df_sensordata = pd.DataFrame(flattened_data)

def calculate_fft(self, raw_data):
"""Calculate FFT for the raw data."""
raw_data = raw_data - np.mean(raw_data) # Remove the DC component
fft_result = np.fft.fft(raw_data) # Compute FFT
frequencies = np.fft.fftfreq(len(raw_data), d=1/self.sampling_rate) # Compute frequencies
fft_magnitude = np.abs(fft_result) # Compute the magnitude
positive_frequencies = frequencies[:len(frequencies)//2] # Only positive frequencies
positive_fft_magnitude = fft_magnitude[:len(frequencies)//2] # Only positive FFT magnitudes
return positive_frequencies, positive_fft_magnitude

def add_fft_column(self):
"""Add an FFT column to the DataFrame."""
fft_results = []
for _, row in self.df_sensordata.iterrows():
raw_data = np.array(row['rawData'])
_, positive_fft_magnitude = self.calculate_fft(raw_data) # Calculate FFT for the row
fft_results.append(list(positive_fft_magnitude)) # Append FFT result
self.df_sensordata['FFT'] = fft_results

def add_timestep_and_label(self):
"""Add timestep and label columns to the DataFrame."""
# Add 'timestep' column in 5-second increments
self.df_sensordata['timestep'] = self.df_sensordata.index * 5

# Add 'label' column, initialized to 0
self.df_sensordata['label'] = 0

def label_alarm_events(self):
"""Label the data based on alarm events."""
for idx, row in self.df_sensordata.iterrows():
if row['alarmPhrase'] == 'ALARM': # If alarmPhrase is ALARM
alarm_time = row['timestep']
seizure_times = row['seizure_times']

# Process the seizure times list in seconds
for seizure in seizure_times:
start_time = alarm_time + seizure # Adjust by the seizure offset

# Label the rows before and after the alarm (within the range of seizure_times)
before_idx = self.df_sensordata[(self.df_sensordata['timestep'] >= start_time) &
(self.df_sensordata['timestep'] < alarm_time)].index
self.df_sensordata.loc[before_idx, 'label'] = 1 # Mark as seizure (1)

# For the positive offset (after alarm)
after_idx = self.df_sensordata[(self.df_sensordata['timestep'] >= alarm_time) &
(self.df_sensordata['timestep'] <= start_time)].index
self.df_sensordata.loc[after_idx, 'label'] = 1 # Mark as seizure (1)

def process_data(self):
"""Process the data through all stages and return the final DataFrame."""
# Step 1: Load the data
self.load_data()

# Step 2: Add FFT column
self.add_fft_column()

# Step 3: Add timestep and label columns
self.add_timestep_and_label()

# Step 4: Label based on alarm events
self.label_alarm_events()

# Step 5: Drop the 'seizure_times' column
self.df_sensordata.drop(columns=['seizure_times'], inplace=True)

return self.df_sensordata


# Example usage
file_path = '../../tests/testData/testDataVisualisation.json' # Replace with your JSON file path
processor = LabelGenerator(file_path)

# Process the data and get the resulting DataFrame
df_result = processor.process_data()

# Optionally save the DataFrame to a CSV file
df_result.to_csv('generatedCsvDatasets/sensordata_labeled.csv', index=False)

# Display the first few rows of the processed DataFrame
print(df_result.head(30))
1 change: 0 additions & 1 deletion user_tools/scripts/ConvolutionalNeuralNetwork.v1.24.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,6 @@
],
"source": [
"# Print Lables and features to see if they have same value\n",
"\n",
"#Labels for the train dataset\n",
"train_labels = df[[\"outcome\"]]\n",
"print(train_labels.shape)\n",
Expand Down
254 changes: 254 additions & 0 deletions user_tools/scripts/DataLoader_OSDB_version1.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"# Core Imports\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"import warnings\n",
"from itertools import cycle\n",
"\n",
"# Ignore warnings in Python\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Id</th>\n",
" <th>eventId</th>\n",
" <th>userId</th>\n",
" <th>type</th>\n",
" <th>subType</th>\n",
" <th>hr</th>\n",
" <th>rawData</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9291</th>\n",
" <td>9292</td>\n",
" <td>14898</td>\n",
" <td>156</td>\n",
" <td>Fall</td>\n",
" <td>Uncontrolled</td>\n",
" <td>0</td>\n",
" <td>[1004.324016377599, 1005.6113176704044, 1008.3...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9292</th>\n",
" <td>9293</td>\n",
" <td>14898</td>\n",
" <td>156</td>\n",
" <td>Fall</td>\n",
" <td>Uncontrolled</td>\n",
" <td>0</td>\n",
" <td>[1000.1011495342927, 1004.0482531052368, 1000....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9293</th>\n",
" <td>9294</td>\n",
" <td>14898</td>\n",
" <td>156</td>\n",
" <td>Fall</td>\n",
" <td>Uncontrolled</td>\n",
" <td>0</td>\n",
" <td>[1001.1701269235376, 1002.7462560430771, 998.6...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9294</th>\n",
" <td>9295</td>\n",
" <td>14898</td>\n",
" <td>156</td>\n",
" <td>Fall</td>\n",
" <td>Uncontrolled</td>\n",
" <td>0</td>\n",
" <td>[1012.4248508971073, 1004.5334132492027, 1003....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9295</th>\n",
" <td>9296</td>\n",
" <td>14898</td>\n",
" <td>156</td>\n",
" <td>Fall</td>\n",
" <td>Uncontrolled</td>\n",
" <td>0</td>\n",
" <td>[1004.9762153019517, 1000.7225907604744, 1005....</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Id eventId userId type subType hr \\\n",
"9291 9292 14898 156 Fall Uncontrolled 0 \n",
"9292 9293 14898 156 Fall Uncontrolled 0 \n",
"9293 9294 14898 156 Fall Uncontrolled 0 \n",
"9294 9295 14898 156 Fall Uncontrolled 0 \n",
"9295 9296 14898 156 Fall Uncontrolled 0 \n",
"\n",
" rawData \n",
"9291 [1004.324016377599, 1005.6113176704044, 1008.3... \n",
"9292 [1000.1011495342927, 1004.0482531052368, 1000.... \n",
"9293 [1001.1701269235376, 1002.7462560430771, 998.6... \n",
"9294 [1012.4248508971073, 1004.5334132492027, 1003.... \n",
"9295 [1004.9762153019517, 1000.7225907604744, 1005.... "
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import json\n",
"import pandas as pd\n",
"\n",
"# List of file paths\n",
"file_paths = [\n",
" 'C:/Users/jamie/Desktop/PhD Research/Experimental Research Code + Datasets/Experimental Research Code + Datasets/Early Version of Open Seizure Database/Open Seizure Database/osdb_3min_allSeizures.json',\n",
" 'C:/Users/jamie/Desktop/PhD Research/Experimental Research Code + Datasets/Experimental Research Code + Datasets/Early Version of Open Seizure Database/Open Seizure Database/osdb_3min_falseAlarms.json',\n",
" 'C:/Users/jamie/Desktop/PhD Research/Experimental Research Code + Datasets/Experimental Research Code + Datasets/Early Version of Open Seizure Database/Open Seizure Database/osdb_3min_fallEvents.json'\n",
"]\n",
"\n",
"# Initialize an empty list to store data from all files\n",
"all_rows = []\n",
"current_id = 1 # Start Id counter\n",
"\n",
"# Process each file\n",
"for file_path in file_paths:\n",
" with open(file_path, 'r') as file:\n",
" data = json.load(file)\n",
"\n",
" # Extract data from the JSON file\n",
" for event in data:\n",
" event_id = event.get(\"id\")\n",
" user_id = event.get(\"userId\")\n",
" event_type = event.get(\"type\")\n",
" sub_type = event.get(\"subType\")\n",
" hr1 = event.get('hr')\n",
"\n",
"\n",
" # Loop through each datapoint within the event\n",
" for datapoint in event.get(\"datapoints\", []):\n",
" raw_data = None\n",
"\n",
" # Extract rawData from the nested dataJSON field\n",
" try:\n",
" nested_data = json.loads(datapoint.get(\"dataJSON\", \"{}\"))\n",
" nested_inner_data = json.loads(nested_data.get(\"dataJSON\", \"{}\"))\n",
" hr = nested_inner_data.get(\"hr\")\n",
" raw_data = nested_inner_data.get(\"rawData\")\n",
" except json.JSONDecodeError:\n",
" pass\n",
"\n",
" # Append the extracted data with the current Id\n",
" all_rows.append({\n",
" \"Id\": current_id,\n",
" \"eventId\": event_id,\n",
" \"userId\": user_id,\n",
" \"type\": event_type,\n",
" \"subType\": sub_type,\n",
" \"hr\": hr,\n",
" \"rawData\": raw_data\n",
" })\n",
" current_id += 1 # Increment Id\n",
"\n",
"# Create a single DataFrame\n",
"df = pd.DataFrame(all_rows)\n",
"\n",
"# Display the DataFrame\n",
"df.tail()"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unique subTypes after filtering, renaming, and replacing spaces: ['Other_Seizure' 'Tonic-Clonic' 'Aura' 'Typing' 'Pushing_Pram/Wheelchair'\n",
" 'Washing_/_cleaning' 'Other_False_Alarm' 'Talking' 'Brushing_Teeth'\n",
" 'Sorting' 'Motor_Vehicle' 'Cycling' 'Computer_Games' 'Fall']\n"
]
}
],
"source": [
"# Define the subType values to drop\n",
"subtypes_to_drop = [None, 'null', '', 'Unknown']\n",
"\n",
"# Identify eventIds that have these subType values\n",
"event_ids_to_drop = df[df['subType'].isin(subtypes_to_drop)]['eventId'].unique()\n",
"\n",
"# Filter out rows with these eventIds\n",
"df_filtered = df[~df['eventId'].isin(event_ids_to_drop)]\n",
"\n",
"# Rename 'Uncontrolled' to 'Fall'\n",
"#df_filtered['subType'] = df_filtered['subType'].replace('Uncontrolled', 'Fall')\n",
"df_filtered.loc[df_filtered['subType'] == 'Other', 'subType'] = 'Other_Seizure'\n",
"df_filtered.loc[df_filtered['subType'] == 'Uncontrolled', 'subType'] = 'Fall'\n",
"df_filtered.loc[df_filtered['subType'] == 'Other (Please describe in notes)', 'subType'] = 'Other_False_Alarm'\n",
"\n",
"# Replace spaces with underscores in the 'subType' column\n",
"df_filtered.loc[:, 'subType'] = df_filtered['subType'].str.replace(' ', '_')\n",
"\n",
"# Print all unique subType values after replacement\n",
"unique_subtypes = df_filtered['subType'].unique()\n",
"print(\"Unique subTypes after filtering, renaming, and replacing spaces:\", unique_subtypes)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading