From 0f8e54ba1edb1e20e8aa18087bb60986eff0c19b Mon Sep 17 00:00:00 2001 From: Pavlo Bazilinskyy Date: Wed, 1 Jan 2025 08:38:38 +0200 Subject: [PATCH] Cleanup of ttest and anova --- trust/analysis/analysis.py | 96 ++++++++++++++++++++++++++------------ trust/analysis/heroku.py | 9 ++-- trust/run.py | 12 ++++- 3 files changed, 79 insertions(+), 38 deletions(-) diff --git a/trust/analysis/analysis.py b/trust/analysis/analysis.py index a1aa3409..e3e29ca6 100644 --- a/trust/analysis/analysis.py +++ b/trust/analysis/analysis.py @@ -1992,7 +1992,7 @@ def plot_kp_slider_videos(self, df, y: list, y_legend=None, x=None, events=None, orientation='v', xaxis_slider_title='Stimulus', yaxis_slider_show=False, yaxis_slider_title=None, show_text_labels=False, name_file=None, save_file=True, fig_save_width=1320, legend_x=0.7, legend_y=0.95, fig_save_height=680, font_family=None, - font_size=None, ttest_signals=None): + font_size=None, ttest_signals=None, anova_signals=None): """Plot keypresses with multiple variables as a filter and slider questions for the stimuli. Args: @@ -2026,7 +2026,8 @@ def plot_kp_slider_videos(self, df, y: list, y_legend=None, x=None, events=None, fig_save_height (int, optional): height of figures to be saved. font_family (str, optional): font family to be used across the figure. None = use config value. font_size (int, optional): font size to be used across the figure. None = use config value. - ttest_signals (list, optional): signals to compare with ttest. None = compare all with all. + ttest_signals (list, optional): signals to compare with ttest. None = do not compare. + anova_signals (dict, optional): signals to compare with ANOVA. None = do not compare. """ logger.info('Creating figure keypress+slider for {}.', df.index.tolist()) # calculate times @@ -2137,23 +2138,32 @@ def plot_kp_slider_videos(self, df, y: list, y_legend=None, x=None, events=None, text=text, textposition='auto'), row=1, col=2) # output ttest - for signals in ttest_signals: - # smoothen signal - if self.smoothen_signal: - signal_1 = self.smoothen_filter(signals['signal_1']) - signal_2 = self.smoothen_filter(signals['signal_2']) - # receive significance values - # considering 0.02s is the response input - significance = self.ttest(signal_1=signal_1, - signal_2=signal_2, - paired=signals['paired']) + if ttest_signals: + for signals in ttest_signals: + # # smoothen signal + # if self.smoothen_signal: + # signal_1 = self.smoothen_filter(signals['signal_1']) + # signal_2 = self.smoothen_filter(signals['signal_2']) + # receive significance values + [p_values, significance] = self.ttest(signal_1=signals['signal_1'], + signal_2=signals['signal_2'], + paired=signals['paired']) # add to the plot # todo: @Shadab, plot those stars here based on significance # todo: @Shadab, adjust the ylim with yaxis_kp_range - - # output anova - # self.anova(y) - # output anova + # output ANOVA + if anova_signals: + # # smoothen signal + # if self.smoothen_signal: + # signal_1 = self.smoothen_filter(signals['signal_1']) + # signal_2 = self.smoothen_filter(signals['signal_2']) + # receive significance values + [p_values, significance] = self.anova(signal_1=anova_signals['signal_1'], + signal_2=anova_signals['signal_2'], + signal_3=anova_signals['signal_3']) + # add to the plot + # todo: @Shadab, plot those pluses here based on significance + # todo: @Shadab, adjust the ylim with yaxis_kp_range # update axis fig.update_xaxes(title_text=xaxis_slider_title, row=1, col=2) fig.update_yaxes(title_text=yaxis_slider_title, row=1, col=2) @@ -2760,26 +2770,52 @@ def smoothen_filter(self, signal, type_flter='OneEuroFilter'): return -1 def ttest(self, signal_1, signal_2, type="two-sided", paired=True): - # Convert to numpy arrays if signal_1 and signal_2 are lists + """Summary + + Args: + signal_1 (TYPE): Description + signal_2 (TYPE): Description + type (str, optional): Description + paired (bool, optional): Description + + Returns: + TYPE: Description + """ + # todo: @Shadab, finish the doctring above. + # convert to numpy arrays if signal_1 and signal_2 are lists signal_1 = np.asarray(signal_1) signal_2 = np.asarray(signal_2) - - # Perform t-test for each value (treated as an independent bin) - significance = [] + p_values = [] # record raw p value for each bin + significance = [] # record binary flag (0 or 1) if p value < tr.common.get_configs('p_value')) + # perform t-test for each value (treated as an independent bin) for i in range(len(signal_1)): if paired: t_stat, p_value = ttest_rel([signal_1[i]], [signal_2[i]], axis=-1, alternative=type) else: t_stat, p_value = ttest_ind([signal_1[i]], [signal_2[i]], axis=-1, alternative=type, equal_var=False) - - # Determine significance for this value + # record raw p value + p_values.append(p_value) + # determine significance for this value + # todo: double check based on matlab < or <= significance.append(int(p_value < tr.common.get_configs('p_value'))) + # return raw p values and binary flags for significance for output + return [p_values, significance] - return significance - - def anova(self, signal_type, signal_ego, signal_kp): - # signal_type = list of int, eg: [1,1,0,0] - # signal_ego = list of int, eg: [1,1,0,0] - # signal_kp = list of lists, eg: [[1,1,1,1], [1,1,1,1], [1,1,1,1], [1,1,1,1]] - # return [0,0,0,0,1,0,0] - return + def anova(self, signal_1, signal_2, signal_3): + """Summary + + Args: + signal_1 (TYPE): Description + signal_2 (TYPE): Description + signal_3 (TYPE): Description + + Returns: + TYPE: Description + """ + # convert to numpy arrays if signal_1 and signal_2 are lists + signal_1 = np.asarray(signal_1) + signal_2 = np.asarray(signal_2) + signal_3 = np.asarray(signal_3) + p_values = [] # record raw p value for each bin + significance = [] # record binary flag (0 or 1) if p value < tr.common.get_configs('p_value')) + return [p_values, significance] diff --git a/trust/analysis/heroku.py b/trust/analysis/heroku.py index d23a8a82..e43053b0 100644 --- a/trust/analysis/heroku.py +++ b/trust/analysis/heroku.py @@ -583,16 +583,13 @@ def process_kp(self, filter_length=True): # loop through rows in column for row_index, row in enumerate(col_data): # consider only videos of allowed length - if (video_dur in self.heroku_data.keys() - and filter_length): + if video_dur in self.heroku_data.keys() and filter_length: # extract recorded duration dur = self.heroku_data.iloc[row_index][video_dur] # check if duration is within limits - if (dur < self.mapping['min_dur'][video_id] - or dur > self.mapping['max_dur'][video_id]): + if dur < self.mapping['min_dur'][video_id] or dur > self.mapping['max_dur'][video_id]: # increase counter of filtered videos - logger.debug('Filtered keypress data from ' - + 'video {} of detected ' + logger.debug('Filtered keypress data from video {} of detected ' + 'duration of {} for ' + 'worker {}.', video_id, dur, diff --git a/trust/run.py b/trust/run.py index a4a9ad62..207c80ab 100644 --- a/trust/run.py +++ b/trust/run.py @@ -202,7 +202,7 @@ 'end': end, 'annotation': vert_line_annotations[x]}) # prepare pairs of signals to compare with ttest - ttest_signals = [] # list of dictionaries + # ttest_signals = [] # list of dictionaries # todo: @Shadab, create list of things to compare using ttest here # 0 and 1 = within (paired): https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html # noqa: E501 # 0 and 2 = between: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html @@ -235,6 +235,13 @@ {'signal_1': df.loc['video_' + str(ids[1])]['kp'], # 1 and 3 = between 'signal_2': df.loc['video_' + str(ids[3])]['kp'], 'paired': False}] + # signal_1 = signal_type = list of int, eg: [1,1,0,0] + # signal_2 = signal_ego = list of int, eg: [1,1,0,0] + # signal_3 = signal_kp = list of lists, eg: [[1,1,1,1], [1,1,1,1], [1,1,1,1], [1,1,1,1]] + # prepare signals to compare with ANOVA + anova_signals = {'signal_1': df.loc['video_' + str(ids[0])]['kp'], + 'signal_2': df.loc['video_' + str(ids[0])]['kp'], + 'signal_3': df.loc['video_' + str(ids[0])]['kp']} # plot keypress data and slider questions analysis.plot_kp_slider_videos(df, y=['comfort', 'safety', 'expectation'], @@ -256,7 +263,8 @@ fig_save_width=1600, # preserve ratio 225x152 fig_save_height=1080, # preserve ratio 225x152 name_file='kp_videos_sliders_'+','.join([str(i) for i in ids]), - ttest_signals=ttest_signals) + ttest_signals=ttest_signals, + anova_signals=anova_signals) # keypresses of an individual stimulus for an individual pp analysis.plot_kp_video_pp(mapping, heroku_data,