diff --git a/POTATO_ForceRamp.py b/POTATO_ForceRamp.py index 890d34f..25ca792 100644 --- a/POTATO_ForceRamp.py +++ b/POTATO_ForceRamp.py @@ -9,236 +9,375 @@ from POTATO_fitting import fitting_ds, fitting_ss, plot_fit from POTATO_preprocessing import preprocess_RAW, trim_data, create_derivation from POTATO_find_steps import find_steps_F, find_steps_PD, find_common_steps, calc_integral, save_figure -"""define the functions of the subprocess processing the data""" +from POTATO_processMultiH5 import split_H5 -# open a folder containing raw data and lead through the analysis process -def start_subprocess(analysis_folder, timestamp, Files, input_settings, input_format, export_data, input_fitting, output_q): - # empty dataframe to store all step results of all curves in the folder - total_results_steps = pd.DataFrame() - - # create dataframe to store all fitting parameters of all curves in the folder - header_fit = [ - "filename", - "model", - "log_likelihood", - "Lc_ds", - "Lp_ds", - "Lp_ds_stderr", - "St_ds", - "Lc_ss", - "Lc_ss_stderr", - "Lp_ss", - "St_ss", - "f_offset", - "d_offset", - "Work_(pN*nm)", - "Work_(kB*T)" - ] - - total_results_fit = pd.DataFrame(columns=header_fit) +"""define the functions of the subprocess processing the data""" - # iterate through the files in the selected folder - i = 0 - # proceed differently with h5 and csv files - while i < len(Files): - if input_format['CSV'] == 1: - df = pd.read_csv(Files[i]) - directory_i = Path(Files[i]) - filename_i = directory_i.name[:-4] - # access the raw data - Force_1x = df.to_numpy()[:, 0] - Distance_1x = df.to_numpy()[:, 1] - # accessing the data frequency from user input - Frequency_value = input_settings['data_frequency'] - Force_Distance, Force_Distance_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) + +def read_in_data(file_num, Files, input_settings, input_format): + if input_format['CSV'] == 1: + df = pd.read_csv(Files[file_num]) + directory_i = Path(Files[file_num]) + filename_i = directory_i.name[:-4] + # access the raw data + Force = df.to_numpy()[:, 0] + if input_format['length_measure'] == 1: + Distance = df.to_numpy()[:, 1] else: - with h5py.File(Files[i], "r") as f: - directory_i = Path(Files[i]) - filename_i = directory_i.name[:-3] - - # access the raw data - if input_format['HF'] == 1: - Force_1x = f.get("Force HF/Force 1x") - Distance_1x = f.get("Distance/Piezo Distance") - # accessing the data frequency from the h5 file - Frequency_value = Force_1x.attrs['Sample rate (Hz)'] - Force_Distance, Force_Distance_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) - - elif input_format['LF'] == 1: - load_force = f.get("Force LF/Force 1x") - Force_1x = load_force[:]['Value'][:] - load_distance = f.get("Distance/Distance 1")[:] - Distance_1x = load_distance['Value'][:] - Force_Distance = np.column_stack((Force_1x, Distance_1x)) - - # calculating the data frequency based on start- and end-time of the measurement - size_F_LF = len(Force_1x) - stop_time_F_LF = load_force.attrs['Stop time (ns)'] - timestamp_F_LF = load_force.attrs['Start time (ns)'] - - Frequency_value = size_F_LF / ((stop_time_F_LF - timestamp_F_LF) / 10**9) - Force_Distance, Force_Distance_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) - - # Export down sampled and smoothened FD values - if export_data['export_SMOOTH'] == 1: - filename = analysis_folder + "/" + filename_i + "_smooth_" + timestamp + ".csv" - np.savetxt(filename, Force_Distance_um, delimiter=",") + Distance = df.to_numpy()[:, 1] / 1000 + # accessing the data frequency from user input + Frequency_value = input_settings['data_frequency'] + if input_format['preprocess'] == 1: + Force_Distance, Force_Distance_um = preprocess_RAW(Force, Distance, input_settings) else: - pass + Force_Distance = np.column_stack((Force, Distance * 1000)) + Force_Distance_um = np.column_stack((Force, Distance)) + + else: + with h5py.File(Files[file_num], "r") as f: + directory_i = Path(Files[file_num]) + filename_i = directory_i.name[:-3] - # trim data below specified force thresholds - F_trimmed, PD_trimmed, F_low = trim_data(Force_Distance, input_settings['F_min']) + # access the raw data + if input_format['HF'] == 1: + if input_format['Trap'] == 1: + Force = f.get("Force HF/Force 1x") + elif input_format['Trap'] == 0: + Force = f.get("Force HF/Force 2x") + Distance = f.get("Distance/Piezo Distance") + # accessing the data frequency from the h5 file + Frequency_value = Force.attrs['Sample rate (Hz)'] + if input_format['preprocess'] == 1: + Force_Distance, Force_Distance_um = preprocess_RAW(Force, Distance, input_settings) + else: + Force_Distance = np.column_stack((Force, Distance * 1000)) + Force_Distance_um = np.column_stack((Force, Distance)) + + elif input_format['LF'] == 1: + if input_format['Trap'] == 1: + load_force = f.get("Force LF/Force 1x") + Force = load_force[:]['Value'][:] + load_distance = f.get("Distance/Distance 1x")[:] + Distance = load_distance['Value'][:] + elif input_format['Trap'] == 0: + load_force = f.get("Force LF/Force 2x") + Force = load_force[:]['Value'][:] + load_distance = f.get("Distance/Distance 2x")[:] + Distance = load_distance['Value'][:] + if input_format['preprocess'] == 1: + Force_Distance, Force_Distance_um = preprocess_RAW(Force, Distance, input_settings) + else: + Force_Distance = np.column_stack((Force, Distance * 1000)) + Force_Distance_um = np.column_stack((Force, Distance)) + # calculating the data frequency based on start- and end-time of the measurement + size_F_LF = len(Force) + stop_time_F_LF = load_force.attrs['Stop time (ns)'] + timestamp_F_LF = load_force.attrs['Start time (ns)'] + Frequency_value = size_F_LF / ((stop_time_F_LF - timestamp_F_LF) / 10**9) + + return Force_Distance, Force_Distance_um, Frequency_value, filename_i - # create force and distance derivation of the pre-processed data to be able to identify steps - derivation_array = create_derivation(input_settings, Frequency_value) - """find steps based on force derivation""" - filename_results = analysis_folder + "/" + filename_i + "_results_" + timestamp + ".csv" +# open a folder containing raw data and lead through the analysis process +def start_subprocess(analysis_folder, timestamp, Files, input_settings, input_format, export_data, input_fitting, output_q): + # create file to store total results + if export_data['export_TOTAL'] == 1: + filename_total_results = analysis_folder + '/total_results_' + timestamp + '.csv' - try: - results_F, PD_start_F = find_steps_F( - input_settings, - filename_i, - Force_Distance, - derivation_array + with open(filename_total_results, 'w') as f: + f.write('>Common steps from all curves of the folder:\n') + head = ( + 'filename', + 'orientation', + 'Derivation of', + 'step number', + 'F1', + 'F2', + 'Fc', + 'step start', + 'step end', + 'step length', + 'filename', + 'model', + 'log_likelihood', + 'Lc_ds', + 'Lp_ds', + 'Lp_ds_stderr', + 'St_ds', + 'Lc_ss', + 'Lc_ss_stderr', + 'Lp_ss', + 'St_ss', + 'f_offset', + 'd_offset', + 'Work_(pN*nm)', + 'Work_(kB*T)' ) + f.write(','.join(head)) + f.write('\n') - results_F_list = list(results_F) + # iterate through the files in the selected folder + file_num = 0 + while file_num < len(Files): + if file_num == 0: + print('\nHard work ahead!\n') + output_q.put('Hard work ahead!') + + # proceed differently with h5 and csv files + Force_Distance, Force_Distance_um, Frequency_value, filename = read_in_data(file_num, Files, input_settings, input_format) - if export_data['export_STEPS'] == 1: - steps_results_F = pd.DataFrame(results_F_list) - with open(filename_results, 'a+') as f: - f.write('\nSteps found by force derivation:\n') - steps_results_F.to_csv(filename_results, mode='a', index=False, header=True) + num_curves = 1 + + ###### Detect MultiFiles ###### + if input_format['MultiH5'] == 1: + try: + fw_curves, rv_curves = split_H5(Force_Distance, input_settings, Frequency_value) + num_fw = len(fw_curves) + fw_curves.extend(rv_curves) + curves = fw_curves + except: + print('No Multi-File detected!') + curves = [Force_Distance] + else: + curves = [Force_Distance] + + num_curves = len(curves) + + for x in range(num_curves): + # empty dataframe to store all step results of all curves in the folder + total_results_steps = pd.DataFrame() + + # create dataframe to store all fitting parameters of all curves in the folder + header_fit = [ + "filename", + "model", + "log_likelihood", + "Lc_ds", + "Lp_ds", + "Lp_ds_stderr", + "St_ds", + "Lc_ss", + "Lc_ss_stderr", + "Lp_ss", + "St_ss", + "f_offset", + "d_offset", + "Work_(pN*nm)", + "Work_(kB*T)" + ] + + total_results_fit = pd.DataFrame(columns=header_fit) + + if num_curves == 1: + filename_i = filename + else: + if x < num_fw: + suffix = 'fw_curve{num}'.format(num=x + 1) + filename_i = filename + '_' + suffix + else: + suffix = 'rv_curve{num}'.format(num=x + 1 - num_fw) + filename_i = filename + '_' + suffix + + Force_Distance = curves[x][:, :2] + Force_Distance_um = np.copy(Force_Distance) + Force_Distance_um[:, 1] = Force_Distance_um[:, 1] / 1000 + ###### Detect MultiFiles ###### + orientation = "forward" + if Force_Distance[0, 1] > Force_Distance[-1, 1]: # reverse + orientation = "reverse" + Force_Distance = np.flipud(Force_Distance) + Force_Distance_um = np.flipud(Force_Distance_um) + + # Export down sampled and smoothened FD values + if export_data['export_SMOOTH'] == 1: + save_to = analysis_folder + "/" + filename_i + "_smooth_" + timestamp + ".csv" + np.savetxt(save_to, Force_Distance_um, delimiter=",") else: pass - except: - results_F = [] - PD_start_F = [] - print("Error in finding steps for file " + str(filename_i) + '\n' 'There was an error in finding Force steps') - pass + # trim data below specified force thresholds + F_trimmed, PD_trimmed, F_low = trim_data(Force_Distance, input_settings['F_min']) - """find steps based on distance derivation""" + if not F_trimmed.size == 0: + # create force and distance derivation of the pre-processed data to be able to identify steps + derivation_array = create_derivation(input_settings, Frequency_value, F_trimmed, PD_trimmed, F_low) - try: - results_PD, PD_start_PD = find_steps_PD( - input_settings, - filename_i, - Force_Distance, - derivation_array - ) + """find steps based on force derivation""" + filename_results = analysis_folder + "/" + filename_i + "_results_" + timestamp + ".csv" - results_PD_list = list(results_PD) - - if export_data['export_STEPS'] == 1: - steps_results_PD = pd.DataFrame(results_PD_list) - with open(filename_results, 'a+') as f: - f.write('\nSteps found by distance derivation:\n') - steps_results_PD.to_csv(filename_results, mode='a', index=False, header=True) - - except: - results_PD = [] - PD_start_PD = [] - err_PD = str("Error in finding steps for file " + str(filename_i) + '\n' 'There was an error in finding Distance steps') - print(err_PD) - pass - - # save plot with FD-curve, derivations and found steps - save_figure( - export_data['export_PLOT'], - timestamp, - filename_i, - analysis_folder, - Force_Distance, - derivation_array, - F_trimmed, - PD_trimmed, - PD_start_F, - PD_start_PD - ) - - # when steps are found by force AND distance derivation, they are considered common steps - try: - common_steps = find_common_steps(results_F_list, results_PD_list) - # to match with the fitting rows (always one more than steps) put a 'step 0' as first line - common_steps_results = [{'filename': filename_i, 'Derivation of': '', 'step #': 0, 'F1': '', 'F2': '', 'Fc': '', 'step start': '', 'step end': '', 'step length': ''}] - except: - err_FCS = str("Error in finding common steps" + str(filename_i) + '\n' 'There was an error in finding common steps') - output_q.put(err_FCS) - pass - - # append common steps to the 'step 0' - if common_steps: - for x in range(len(common_steps)): - common_steps_results.append(common_steps[x]) - - # convert common steps to dataframe for export - common_steps_results = pd.DataFrame(common_steps_results) - - # export the steps into the results for ONLY this file - with open(filename_results, 'a+') as f: - f.write('\nCommon steps:\n') - common_steps_results.to_csv(filename_results, mode='a', index=False, header=True) - - # put common steps into a total_results dataframe so all steps from all files of the analysed folder can be exported together - total_results_steps = total_results_steps.append(common_steps_results, ignore_index=True, sort=False) - - else: - common_steps_results = [{'filename': filename_i, 'Derivation of': '', 'step #': 'no common steps', 'F1': '', 'F2': '', 'Fc': '', 'step start': '', 'step end': '', 'step length': ''}] - total_results_steps = total_results_steps.append(common_steps_results, ignore_index=True, sort=False) - - '''if common steps were found, try to fit FD-Curve''' - empty = { - 'filename': filename_i, - 'model': 'None', - 'log_likelihood': 'None', - 'Lc_ds': 'None', - 'Lp_ds': 'None', - 'Lp_ds_stderr': 'None', - 'St_ds': 'None', - 'f_offset': 'None', - 'd_offset': 'None' - } - - if export_data['export_FIT'] == 1: - try: - export_fit = [] - fit = [] - start_force_ss = [] - start_distance_ss = [] - integral_ss_fit_start = [] - integral_ss_fit_end = [] - - # try to fit all parts of curve based on the common steps try: - # fit part between start of the FD-cure up to the first common step - export_fit_ds, area_ds = fitting_ds( - filename_i, + results_F, PD_start_F = find_steps_F( input_settings, - export_data, - input_fitting, - float(common_steps[0]['step start']), + filename_i, Force_Distance, derivation_array, - F_low + orientation ) - export_fit.append(export_fit_ds) + results_F_list = list(results_F) + + if export_data['export_STEPS'] == 1: + steps_results_F = pd.DataFrame(results_F_list) + with open(filename_results, 'a+') as f: + f.write('\nSteps found by force derivation:\n') + steps_results_F.to_csv(filename_results, mode='a', index=False, header=True) + else: + pass + + except: + results_F = [] + PD_start_F = [] + print("Error in finding steps for file " + str(filename_i) + '\n' 'There was an error in finding Force steps') + pass - # fit parts after steps, when more than one common step was found, there are multiple parts to fit - if len(common_steps) > 1: - for n in range(0, len(common_steps) - 1): - # try to fit each part of the curve, if one of the parts can not be fitted, still try to fit the others + """find steps based on distance derivation""" + + try: + results_PD, PD_start_PD = find_steps_PD( + input_settings, + filename_i, + Force_Distance, + derivation_array, + orientation + ) + + results_PD_list = list(results_PD) + + if export_data['export_STEPS'] == 1: + steps_results_PD = pd.DataFrame(results_PD_list) + with open(filename_results, 'a+') as f: + f.write('\nSteps found by distance derivation:\n') + steps_results_PD.to_csv(filename_results, mode='a', index=False, header=True) + + except: + results_PD = [] + PD_start_PD = [] + err_PD = str("Error in finding steps for file " + str(filename_i) + '\n' 'There was an error in finding Distance steps') + print(err_PD) + pass + + # save plot with FD-curve, derivations and found steps + save_figure( + export_data['export_PLOT'], + timestamp, + filename_i, + analysis_folder, + Force_Distance, + derivation_array, + F_trimmed, + PD_trimmed, + PD_start_F, + PD_start_PD + ) + + # when steps are found by force AND distance derivation, they are considered common steps + common_steps = [] + try: + common_steps = find_common_steps(results_F_list, results_PD_list) + # to match with the fitting rows (always one more than steps) put a 'step 0' as first line + common_steps_results = [{'filename': filename_i, 'orientation': orientation, 'Derivation of': '', 'step #': 0, 'F1': '', 'F2': '', 'Fc': '', 'step start': '', 'step end': '', 'step length': ''}] + except: + err_FCS = str("Error in finding common steps" + str(filename_i) + '\n' 'There was an error in finding common steps') + output_q.put(err_FCS) + pass + + # append common steps to the 'step 0' + if common_steps: + for x in range(len(common_steps)): + common_steps_results.append(common_steps[x]) + + # convert common steps to dataframe for export + common_steps_results = pd.DataFrame(common_steps_results) + + # export the steps into the results for ONLY this file + with open(filename_results, 'a+') as f: + f.write('\nCommon steps:\n') + common_steps_results.to_csv(filename_results, mode='a', index=False, header=True) + + # put common steps into a total_results dataframe so all steps from all files of the analysed folder can be exported together + total_results_steps = total_results_steps.append(common_steps_results, ignore_index=True, sort=False) + + else: + common_steps_results = [{'filename': filename_i, 'orientation': orientation, 'Derivation of': '', 'step #': 0, 'F1': '', 'F2': '', 'Fc': '', 'step start': '', 'step end': '', 'step length': ''}] + total_results_steps = total_results_steps.append(common_steps_results, ignore_index=True, sort=False) + + '''if common steps were found, try to fit FD-Curve''' + empty = { + 'filename': filename_i, + 'model': 'None', + 'log_likelihood': 'None', + 'Lc_ds': 'None', + 'Lp_ds': 'None', + 'Lp_ds_stderr': 'None', + 'St_ds': 'None', + 'f_offset': 'None', + 'd_offset': 'None' + } + + if export_data['export_FIT'] == 1: + try: + export_fit = [] + fit = [] + start_force_ss = [] + start_distance_ss = [] + integral_ss_fit_start = [] + integral_ss_fit_end = [] + + # try to fit all parts of curve based on the common steps + try: + # fit part between start of the FD-cure up to the first common step + export_fit_ds, area_ds = fitting_ds( + filename_i, + input_settings, + export_data, + input_fitting, + float(common_steps[0]['step start']), + Force_Distance, + derivation_array, + F_low, + 0 + ) + + export_fit.append(export_fit_ds) + + # fit parts after steps, when more than one common step was found, there are multiple parts to fit + if len(common_steps) > 1: + for n in range(0, len(common_steps) - 1): + # try to fit each part of the curve, if one of the parts can not be fitted, still try to fit the others + try: + fit_ss, f_fitting_region_ss, d_fitting_region_ss, export_fit_ss, area_ss_fit_start, area_ss_fit_end = fitting_ss( + filename_i, + input_settings, + export_data, + input_fitting, + float(common_steps[n]['step end']), + float(common_steps[n + 1]['step start']), + Force_Distance, 1, 1, + derivation_array, + F_low + ) + + fit.append(fit_ss) + start_force_ss.append(f_fitting_region_ss) + start_distance_ss.append(d_fitting_region_ss) + export_fit.append(export_fit_ss) + integral_ss_fit_start.append(area_ss_fit_start) + integral_ss_fit_end.append(area_ss_fit_end) + + except: + export_fit.append(empty) + pass + + # fit the last part of the curve try: fit_ss, f_fitting_region_ss, d_fitting_region_ss, export_fit_ss, area_ss_fit_start, area_ss_fit_end = fitting_ss( filename_i, input_settings, export_data, input_fitting, - float(common_steps[n]['step end']), - float(common_steps[n + 1]['step start']), + float(common_steps[len(common_steps) - 1]['step end']), + max(derivation_array[:, 1]), Force_Distance, 1, 1, derivation_array, F_low @@ -255,125 +394,100 @@ def start_subprocess(analysis_folder, timestamp, Files, input_settings, input_fo export_fit.append(empty) pass - # fit the last part of the curve - try: - fit_ss, f_fitting_region_ss, d_fitting_region_ss, export_fit_ss, area_ss_fit_start, area_ss_fit_end = fitting_ss( - filename_i, - input_settings, - export_data, - input_fitting, - float(common_steps[len(common_steps) - 1]['step end']), - max(derivation_array[:, 1]), - Force_Distance, 1, 1, - derivation_array, - F_low - ) - - fit.append(fit_ss) - start_force_ss.append(f_fitting_region_ss) - start_distance_ss.append(d_fitting_region_ss) - export_fit.append(export_fit_ss) - integral_ss_fit_start.append(area_ss_fit_start) - integral_ss_fit_end.append(area_ss_fit_end) + '''from the fits, work put into the system is calculated''' + if common_steps: + work_per_step = [0] # in pN*nm + kT_per_step = [0] # in kT + + work_first_step, kT_1 = calc_integral( + area_ds, + integral_ss_fit_start[0], + common_steps[0]['step start'], + common_steps[0]['step end'], + common_steps[0]['F1'], + common_steps[0]['F2'] + ) + + print("Work of first step: " + str(work_first_step)) + work_per_step.append(work_first_step) + kT_per_step.append(kT_1) + + if len(common_steps) > 1: + for n in range(0, len(common_steps) - 1): + work_step_n, kT_n = calc_integral( + integral_ss_fit_end[n], + integral_ss_fit_start[n + 1], + common_steps[n + 1]['step start'], + common_steps[n + 1]['step end'], + common_steps[n + 1]['F1'], + common_steps[n + 1]['F2'] + ) + + work_per_step.append(work_step_n) + kT_per_step.append(kT_n) + + j = 0 + for dict in export_fit: + dict["Work_(pN*nm)"] = work_per_step[j] + dict["Work_(kB*T)"] = kT_per_step[j] + j += 1 + + # if no step was found, the common step index 0 is not available and will raise an IndexError. + # So in this case the fit will be performed for the whole curve from beginning to end. + except IndexError: + if not common_steps: + export_fit_ds, area_ds = fitting_ds( + filename_i, + input_settings, + export_data, + input_fitting, + derivation_array[-1, 1], + Force_Distance, + derivation_array, + F_low, + 0 + ) + + export_fit.append(export_fit_ds) + + total_results_fit = total_results_fit.append(export_fit, ignore_index=True, sort=False) + + # create a plot for the fitted curve + plot_fit(fit, start_force_ss, start_distance_ss, Force_Distance, analysis_folder, filename_i, timestamp) except: - export_fit.append(empty) + print('Something went wrong with fitting') pass - '''from the fits, work put into the system is calculated''' - if common_steps: - work_per_step = [0] # in pN*nm - kT_per_step = [0] # in kT - - work_first_step, kT_1 = calc_integral( - area_ds, - integral_ss_fit_start[0], - common_steps[0]['step start'], - common_steps[0]['step end'], - common_steps[0]['F1'], - common_steps[0]['F2'] - ) - - print("Work of first step: " + str(work_first_step)) - work_per_step.append(work_first_step) - kT_per_step.append(kT_1) - - if len(common_steps) > 1: - for n in range(0, len(common_steps) - 1): - work_step_n, kT_n = calc_integral( - integral_ss_fit_end[n], - integral_ss_fit_start[n + 1], - common_steps[n + 1]['step start'], - common_steps[n + 1]['step end'], - common_steps[n + 1]['F1'], - common_steps[n + 1]['F2'] - ) + results_total_total = pd.concat([total_results_steps, total_results_fit], axis=1) + results_total_total.to_csv((filename_total_results), mode='a', index=False, header=False) - work_per_step.append(work_step_n) - kT_per_step.append(kT_n) - - j = 0 - for dict in export_fit: - dict["Work_(pN*nm)"] = work_per_step[j] - dict["Work_(kB*T)"] = kT_per_step[j] - j += 1 - - # if no step was found, the common step index 0 is not available and will raise an IndexError. - # So in this case the fit will be performed for the whole curve from beginning to end. - except IndexError: - if not common_steps: - export_fit_ds, area_ds = fitting_ds( - filename_i, - input_settings, - export_data, - input_fitting, - derivation_array[-1, 1], - Force_Distance, - derivation_array, - F_low - ) - - export_fit.append(export_fit_ds) - - total_results_fit = total_results_fit.append(export_fit, ignore_index=True, sort=False) - - # create a plot for the fitted curve - plot_fit(fit, start_force_ss, start_distance_ss, Force_Distance, analysis_folder, filename_i, timestamp) + print('done', x + 1, 'curves from', len(curves)) + out_progress = str('File ' + str(file_num + 1) + ': Done ' + str(x + 1) + ' curves from ' + str(len(curves))) + output_q.put(out_progress) - except: - print('Something went wrong with fitting') - pass + print(filename_i) + output_q.put(filename_i) + else: + print('This curve was below the Force threshold and could not be processed!\nPlease check if the correct trap was selected.') + output_q.put('This curve was below the Force threshold and could not be processed!\nPlease check if the correct trap was selected.') - if i == 0: - print('\nHard work ahead!\n') - output_q.put('Hard work ahead!') - elif i == int(len(Files) / 2): + + if file_num == int(len(Files) / 2): print('\nHalf way there!\n') output_q.put('Half way there!') print() - elif i == len(Files) - 1: + elif file_num == len(Files) - 1: print('\nAlmost there!\n') output_q.put('Almost there!') - elif i == len(Files): - print('Analysis finished! \nProgram can be closed.') - output_q.put('Analysis finished! \nProgram can be closed.') - i = i + 1 - print('done', i, 'from', len(Files)) - out_progress = str('Done ' + str(i) + ' from ' + str(len(Files))) + file_num = file_num + 1 + print('done', file_num, 'from', len(Files)) + out_progress = str('Done ' + str(file_num) + ' files from ' + str(len(Files))) output_q.put(out_progress) print(filename_i) output_q.put(filename_i) - '''after folder analysis is done, export total results (all steps + fit parameters) in one file''' - if export_data['export_TOTAL'] == 1: - filename_total_results = analysis_folder + '/total_results_' + timestamp + '.csv' - - with open(filename_total_results, 'w') as f: - f.write('Common steps from all curves of the folder:\n') - - results_total_total = pd.concat([total_results_steps, total_results_fit], axis=1) - results_total_total.to_csv((filename_total_results), mode='a', index=False) - else: - pass + print('Analysis finished! \nProgram can be closed.') + output_q.put('Analysis finished! \nProgram can be closed.') diff --git a/POTATO_GUI.py b/POTATO_GUI.py index 93789de..e339829 100644 --- a/POTATO_GUI.py +++ b/POTATO_GUI.py @@ -1,5 +1,6 @@ -""" Copyright 2021 Helmholtz-Zentrum für Infektionsforschung GmbH - POTATO -- 2021-10-14 -- Version 0.1 +"""Copyright 2021 Helmholtz-Zentrum für Infektionsforschung GmbH""" + +""" POTATO -- 2021-10-14 -- Version 1.1 Developed by Lukáš Pekárek and Stefan Buck at the Helmholtz Institute for RNA-based Infection Research In the research group REMI - Recoding Mechanisms in Infections Supervisor - Jun. Prof. Neva Caliskan """ @@ -11,14 +12,14 @@ Alternatively they can be changed permanently in the POTATO_config file""" import tkinter as tk -from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg +from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk from matplotlib.figure import Figure from matplotlib.lines import Line2D from tkinter import filedialog from tkinter import ttk -from tkinter import messagebox from PIL import ImageTk, Image import pandas as pd +import numpy as np import os import h5py import glob @@ -27,10 +28,12 @@ import json # relative imports -from POTATO_ForceRamp import start_subprocess +from POTATO_ForceRamp import start_subprocess, read_in_data from POTATO_preprocessing import preprocess_RAW from POTATO_config import default_values_HF, default_values_LF, default_values_CSV, default_values_FIT, default_values_constantF from POTATO_constantF import get_constantF, display_constantF, fit_constantF +from POTATO_fitting import fitting_ds + # To avoid blurry GUI - DPI scaling import ctypes @@ -46,10 +49,10 @@ def start_analysis(): global p0 global analysis_folder - global image_number + # check user input - input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() - image_number = [] + input_settings, input_format, export_data, input_fitting, TOMATO_fitting, input_constantF = check_settings() + # ask wich directory should be analysed folder = filedialog.askdirectory() root.title('POTATO -- ' + str(folder)) @@ -61,9 +64,9 @@ def start_analysis(): folder_path = str(folder + "/*.h5") Files = glob.glob(folder_path) - print('files to analyse', len(Files)) # print number of files to analyse, if no files found give an error + print('Files to analyse', len(Files)) output_window.insert("end", 'Files to analyse: ' + str(len(Files)) + "\n") output_window.see("end") if not len(Files) == 0: @@ -120,10 +123,14 @@ def parameters(frame, default_values, default_fit, default_constantF): Force_Min1.insert("end", default_values['Force threshold, pN']) Force_Min2.delete(0, "end") Force_Min2.insert("end", default_values['Force threshold, pN']) - STD1_threshold1.delete(0, "end") - STD1_threshold1.insert("end", default_values['Z-score']) - STD1_threshold2.delete(0, "end") - STD1_threshold2.insert("end", default_values['Z-score']) + Z_score_force1.delete(0, "end") + Z_score_force1.insert("end", default_values['Z-score force']) + Z_score_force2.delete(0, "end") + Z_score_force2.insert("end", default_values['Z-score force']) + Z_score_distance1.delete(0, "end") + Z_score_distance1.insert("end", default_values['Z-score distance']) + Z_score_distance2.delete(0, "end") + Z_score_distance2.insert("end", default_values['Z-score distance']) step_d_value.delete(0, "end") step_d_value.insert("end", str(default_values['Step d'])) window_size_value.delete(0, "end") @@ -201,7 +208,8 @@ def check_settings(): 'filter_cut_off': float(Filter_cut_off2.get()), 'F_min': float(Force_Min2.get()), 'step_d': int(step_d_value.get()), - 'z-score': float(STD1_threshold2.get()), + 'z-score_f': float(Z_score_force2.get()), + 'z-score_d': float(Z_score_distance2.get()), 'window_size': int(window_size_value.get()), 'data_frequency': float(Frequency_value.get()), 'STD_diff': float(STD_difference_value.get()) @@ -210,7 +218,11 @@ def check_settings(): input_format = { 'HF': check_box_HF.get(), 'LF': check_box_LF.get(), - 'CSV': check_box_CSV.get() + 'CSV': check_box_CSV.get(), + 'Trap': check_box_Trap1.get(), + 'length_measure': check_box_um.get(), + 'MultiH5': check_box_multiH5.get(), + 'preprocess': check_box_preprocess.get() } export_data = { @@ -242,6 +254,27 @@ def check_settings(): 'offset_d_low': float(d_off_low.get()) } + TOMATO_fitting = { + 'WLC+WLC': int(check_box_WLC.get()), + 'WLC+FJC': int(check_box_FJC.get()), + 'lp_ds': float(entryText_ds_Lp.get()), + 'lp_ds_up': float(dsLp_up.get()), + 'lp_ds_low': float(dsLp_low.get()), + 'lc_ds': float(entryText_ds_Lc.get()), + 'lp_ss': float(entryText_ss_Lp.get()), + 'lc_ss': float(entryText_ss_Lc.get()), + 'ss_stiff': float(entryText_ss_St.get()), + 'offset_f': float(entryText_shift_F.get()), + 'offset_f_up': float(f_off_up.get()), + 'offset_f_low': float(f_off_low.get()), + 'offset_d': float(entryText_shift_d.get()), + 'offset_d_up': float(d_off_up.get()), + 'offset_d_low': float(d_off_low.get()), + 'ds_stiff': float(entryText_ds_St.get()), + 'ds_stiff_up': float(entryText_ds_St.get()) + float(stiff_ds_up.get()), + 'ds_stiff_low': float(entryText_ds_St.get()) - float(stiff_ds_low.get()) + } + input_constantF = { 'x min': int(x_min.get()), 'x max': int(x_max.get()), @@ -253,7 +286,7 @@ def check_settings(): 'Amplitude': amplitude_gauss.get() } - return input_settings, input_format, export_data, input_fitting, input_constantF + return input_settings, input_format, export_data, input_fitting, TOMATO_fitting, input_constantF def export_settings(analysis_path, timestamp, input_1, input_2): @@ -295,23 +328,35 @@ def readme(): # display a single h5 file (tab2) def getRAW_File_h5(): - input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() + input_settings, input_format, export_data, input_fitting, TOMATO_fitting, input_constantF = check_settings() import_file_path = filedialog.askopenfilename() - with h5py.File(import_file_path, "r") as raw_data: # access the raw data if input_format['HF'] == 1: - Force_1x = raw_data.get("Force HF/Force 1x") - Distance_1x = raw_data.get("Distance/Piezo Distance") + if input_format['Trap'] == 1: + Force = raw_data.get("Force HF/Force 1x") + elif input_format['Trap'] == 0: + Force = raw_data.get("Force HF/Force 2x") + Distance = raw_data.get("Distance/Piezo Distance") elif input_format['LF'] == 1: - load_force = raw_data.get("Force LF/Force 1x") - Force_1x = load_force[:]['Value'][:] - load_distance = raw_data.get("Distance/Distance 1")[:] - Distance_1x = load_distance['Value'][:] - print(Force_1x, Distance_1x) - FD, FD_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) - display_RAW_FD(FD[:, 0], FD[:, 1], Force_1x[::input_settings['downsample_value']], Distance_1x[::input_settings['downsample_value']] * 1000) + if input_format['Trap'] == 1: + load_force = raw_data.get("Force LF/Force 1x") + Force = load_force[:]['Value'][:] + load_distance = raw_data.get("Distance/Distance 1x")[:] + Distance = load_distance['Value'][:] + elif input_format['Trap'] == 0: + load_force = raw_data.get("Force LF/Force 2x") + Force = load_force[:]['Value'][:] + load_distance = raw_data.get("Distance/Distance 2x")[:] + Distance = load_distance['Value'][:] + if input_format['preprocess'] == 1: + FD, FD_um = preprocess_RAW(Force, Distance, input_settings) + display_RAW_FD(FD[:, 0], FD[:, 1], Force[::input_settings['downsample_value']], Distance[::input_settings['downsample_value']] * 1000) + else: + Force = np.array(Force) + Distance = np.array(Distance) + display_RAW_FD(Force, Distance, Force, Distance) # display a single csv file (tab2) @@ -324,16 +369,19 @@ def getRAW_File_csv(): else: pass - input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() + input_settings, input_format, export_data, input_fitting, TOMATO_fitting, input_constantF = check_settings() import_file_path = filedialog.askopenfilename() df = pd.read_csv(import_file_path) - Force_HF_1x = df.to_numpy()[:, 0] - Distance_HF_1x = df.to_numpy()[:, 1] + Force = df.to_numpy()[:, 0] + Distance = df.to_numpy()[:, 1] - FD, FD_um = preprocess_RAW(Force_HF_1x, Distance_HF_1x, input_settings) - display_RAW_FD(FD[:, 0], FD[:, 1], Force_HF_1x[::input_settings['downsample_value']], Distance_HF_1x[::input_settings['downsample_value']] * 1000) + if input_format['preprocess'] == 1: + FD, FD_um = preprocess_RAW(Force, Distance, input_settings) + display_RAW_FD(FD[:, 0], FD[:, 1], Force[::input_settings['downsample_value']], Distance[::input_settings['downsample_value']] * 1000) + else: + display_RAW_FD(Force, Distance, Force, Distance) # create the plot for tab2 @@ -359,7 +407,7 @@ def display_RAW_FD(processed_F, processed_D, raw_F, raw_D): def start_constantF(): - input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() + input_settings, input_format, export_data, input_fitting, TOMATO_fitting, input_constantF = check_settings() Force_Distance, Force_Distance_um, frequency, filename, analysis_path, timestamp = get_constantF(input_settings, input_format, input_constantF) fig_constantF, hist_D, filteredDistance_ready = display_constantF(Force_Distance, Force_Distance_um, frequency, input_settings, input_constantF) os.mkdir(analysis_path) @@ -372,7 +420,7 @@ def start_constantF(): def show_constantF(): - input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() + input_settings, input_format, export_data, input_fitting, TOMATO_fitting, input_constantF = check_settings() Force_Distance, Force_Distance_um, frequency, filename, analysis_path, timestamp = get_constantF(input_settings, input_format, input_constantF) fig_constantF, hist_D, filteredDistance_ready = display_constantF(Force_Distance, Force_Distance_um, frequency, input_settings, input_constantF) fig_constantF_tk = FigureCanvasTkAgg(fig_constantF, figure_frame_tab4) @@ -383,10 +431,672 @@ def show_constantF(): def on_closing(): # makes sure all python processes/loops are cancelled before exiting - if messagebox.askokcancel("Quit", "Do you really want to quit?"): + if tk.messagebox.askokcancel("Quit", "Do you really want to quit?"): root.quit() +################ TOMATO ############################### +# from POTATO_TOMATO import open_folder, create_chart, clear_charts, start_click, \ +# end_click, Fitting_WLC_ds_handles, Fitting_WLC_ss_handles, export_table, clear_table_last, \ +# clear_table, reset_parameters, start_work_click, end_work_click, calc_rWork, calc_strWork, next_FD, \ +# previous_FD_key, next_FD_key, save_key, start_click_key, end_click_key, \ +# end_work_click_key, zero_str_work_key, fit_ds_key, fit_ss_key, calc_rWork_key, calc_strWork_key, start_work_click_key, \ +# load_previous_data_key, previous_FD, write_to_table, export_model, zero_str_work +import lumicks.pylake as lk +from scipy.integrate import simps +from POTATO_TOMATO import plot_TOMATO + + +############# define the functions for TOMATO ################## +def open_folder(): + global filename_TOMATO + global Force_Distance_TOMATO + global import_file_path + global TOMATO_fig1 + global Files + global FD_number + # check user input + input_settings, input_format, export_data, input_fitting, TOMATO_fitting, input_constantF = check_settings() + + # ask wich directory should be analysed + folder = filedialog.askdirectory() + root.title('POTATO -- ' + str(folder)) + + # decide which input format was choosen + if input_format['CSV'] == 1: + folder_path = str(folder + "/*.csv") + else: + folder_path = str(folder + "/*.h5") + + Files = glob.glob(folder_path) + + FD_number = 0 + Force_Distance_TOMATO, Force_Distance_um_TOMATO, Frequency_value, filename_TOMATO = read_in_data(FD_number, Files, input_settings, input_format) + entryText_filename.set(filename_TOMATO) + + reset_parameters() + fig = plot_TOMATO(Force_Distance_TOMATO) + TOMATO_fig1 = FigureCanvasTkAgg(fig, TOMATO_frame) + TOMATO_fig1.get_tk_widget().grid(row=0, column=0, sticky='wens') + toolbarFrame = tk.Frame(master=TOMATO_frame) + toolbarFrame.grid(row=2, column=0) + toolbar = NavigationToolbar2Tk(TOMATO_fig1, toolbarFrame) + + +def change_FD(direction): + global TOMATO_fig1 + global filename_TOMATO + global FD_number + global Force_Distance_TOMATO + FD_number = FD_number + direction + + input_settings, input_format, export_data, input_fitting, TOMATO_fitting, input_constantF = check_settings() + Force_Distance_TOMATO, Force_Distance_um_TOMATO, Frequency_value, filename_TOMATO = read_in_data(FD_number, Files, input_settings, input_format) + + if Force_Distance_TOMATO[0, 1] > Force_Distance_TOMATO[-1, 1]: # reverse + Force_Distance_TOMATO = np.flipud(Force_Distance_TOMATO) + Force_Distance_um_TOMATO = np.flipud(Force_Distance_um_TOMATO) + + entryText_filename.set(filename_TOMATO) + + reset_parameters() + fig = plot_TOMATO(Force_Distance_TOMATO) + TOMATO_fig1 = FigureCanvasTkAgg(fig, TOMATO_frame) + TOMATO_fig1.get_tk_widget().grid(row=0, column=0, sticky='wens') + toolbarFrame = tk.Frame(master=TOMATO_frame) + toolbarFrame.grid(row=2, column=0) + toolbar = NavigationToolbar2Tk(TOMATO_fig1, toolbarFrame) + + +def save_previous_data(): + global TOMATO_dict + TOMATO_dict = {'shift_d': entryText_shift_d.get(), + 'shift_F': entryText_shift_F.get(), + 'dsWork': entryText_dsWork.get(), + 'ds_St': entryText_ds_St.get(), + 'ss_St': entryText_ss_St.get(), + 'ssWork': entryText_ssWork.get(), + 'ds_Lp': entryText_ds_Lp.get(), + 'ss_Lp': entryText_ss_Lp.get(), + 'rWork': entryText_rWork.get(), + 'ds_Lc': entryText_ds_Lc.get(), + 'ss_Lc': entryText_ss_Lc.get(), + 'strWork': entryText_strWork.get(), + 'fit_end': entryText_end.get(), + 'fit_start': entryText_start.get(), + 'start_work_D': entryText_start_work_D.get(), + 'start_work_F': entryText_start_work_F.get(), + 'end_work_D': entryText_end_work_D.get(), + 'end_work_F': entryText_end_work_F.get(), + } + + + try: + TOMATO_dict['FD plot'] = Force_Distance_TOMATO + except: + pass + + try: + TOMATO_dict['ds_fitted_region'] = [Force_Distance_TOMATO[:, 1][real_start_2: real_end_2], Force_Distance_TOMATO[:, 0][real_start_2: real_end_2]] + except: + pass + + try: + TOMATO_dict['ss_fitted_region'] = [Force_Distance_TOMATO[:, 1][real_start_3:real_end_3], Force_Distance_TOMATO[:, 0][real_start_3:real_end_3]] + except: + pass + + try: + TOMATO_dict['ds_fit_plot'] = [distance, F_ds_model] + except: + pass + + try: + TOMATO_dict['ss_fit_plot'] = [distance, F_ss_model] + except: + pass + + try: + TOMATO_dict['result_table'] = listBox + except: + pass + + +def load_previous_data(): + entryText_shift_d.set(TOMATO_dict['shift_d']) + entryText_shift_F.set(TOMATO_dict['shift_F']) + entryText_dsWork.set(TOMATO_dict['dsWork']) + entryText_ds_St.set(TOMATO_dict['ds_St']) + entryText_ss_St.set(TOMATO_dict['ss_St']) + entryText_ssWork.set(TOMATO_dict['ssWork']) + entryText_ds_Lp.set(TOMATO_dict['ds_Lp']) + entryText_ss_Lp.set(TOMATO_dict['ss_Lp']) + entryText_rWork.set(TOMATO_dict['rWork']) + entryText_ds_Lc.set(TOMATO_dict['ds_Lc']) + entryText_ss_Lc.set(TOMATO_dict['ss_Lc']) + entryText_strWork.set(TOMATO_dict['strWork']) + entryText_end.set(TOMATO_dict['fit_end']) + entryText_start.set(TOMATO_dict['fit_start']) + entryText_start_work_D.set(TOMATO_dict['start_work_D']) + entryText_start_work_F.set(TOMATO_dict['start_work_F']) + entryText_end_work_D.set(TOMATO_dict['end_work_D']) + entryText_end_work_F.set(TOMATO_dict['end_work_F']) + + global TOMATO_fig1 + fig = plot_TOMATO(np.array([TOMATO_dict['FD plot'][1], TOMATO_dict['FD plot'][0]])) + + TOMATO_fig1 = FigureCanvasTkAgg(fig, TOMATO_frame) + TOMATO_fig1.get_tk_widget().grid(row=0, column=0, sticky='wens') + + toolbarFrame = tk.Frame(master=TOMATO_frame) + toolbarFrame.grid(row=2, column=0) + toolbar = NavigationToolbar2Tk(TOMATO_fig1, toolbarFrame) + + # ds fitted region + try: + subplot1.plot(TOMATO_dict['ds_fitted_region'][0], TOMATO_dict['ds_fitted_region'][1], color="b") + except: + pass + + # ss fitted region + try: + subplot1.plot(TOMATO_dict['ss_fitted_region'][0], TOMATO_dict['ss_fitted_region'][1], color="r") + except: + pass + + # ds fit plot + try: + subplot1.plot(TOMATO_dict['ds_fit_plot'][0], TOMATO_dict['ds_fit_plot'][1], marker=None,linestyle='dashed',linewidth=1,color="black") + except: + pass + + # ss fit plot + try: + subplot1.plot(TOMATO_dict['ss_fit_plot'][0], TOMATO_dict['ss_fit_plot'][1], marker=None, linestyle='dashed', linewidth=1, color="black") + except: + pass + + +# key binding wrapper functions +def previous_FD_key(event): + change_FD(-1) + + +def next_FD_key(event): + change_FD(+1) + + +def save_key(event): + write_to_table() + + +def start_click_key(event): + start_click() + + +def end_click_key(event): + end_click() + + +def start_work_click_key(event): + start_work_click() + + +def end_work_click_key(event): + end_work_click() + + +def zero_str_work_key(event): + zero_str_work() + + +def fit_ds_key(event): + Fitting_WLC_ds_handles() + + +def fit_ss_key(event): + Fitting_WLC_ss_handles() + + +def load_previous_data_key(event): + load_previous_data() + + +def calc_rWork_key(event): + calc_rWork() + + +def calc_strWork_key(event): + calc_strWork() + + +def create_chart(): + global TOMATO_fig1 + + fig = plot_TOMATO(Force_Distance_TOMATO) + TOMATO_fig1 = FigureCanvasTkAgg(fig, TOMATO_frame) + TOMATO_fig1.get_tk_widget().grid(row=0, column=0, sticky='wens') + toolbarFrame = tk.Frame(master=TOMATO_frame) + toolbarFrame.grid(row=2, column=0) + toolbar = NavigationToolbar2Tk(TOMATO_fig1, toolbarFrame) + + +def start_click(): + global cid + cid = TOMATO_fig1.mpl_connect('button_press_event', lambda event, arg=1: onclick_start_end(event, arg)) + + +def end_click(): + global cid + cid = TOMATO_fig1.mpl_connect('button_press_event', lambda event, arg=0: onclick_start_end(event, arg)) + + +def onclick_start_end(event, pos): + global cid + + PD_position, F_position = float(event.xdata), float(event.ydata) + print(PD_position, F_position) + if pos == 1: + entryText_start.set(round(PD_position, 1)) + elif pos == 0: + entryText_end.set(round(PD_position, 1)) + TOMATO_fig1.mpl_disconnect(cid) + + +def start_work_click(): + global cid + cid = TOMATO_fig1.mpl_connect('button_press_event', lambda event, arg=1: onclick_work(event, arg)) + + +def end_work_click(): + global cid + cid = TOMATO_fig1.mpl_connect('button_press_event', lambda event, arg=0: onclick_work(event, arg)) + + +def onclick_work(event, pos): + global cid + + PD_position, F_position = float(event.xdata), float(event.ydata) + print(PD_position, F_position) + if pos == 1: + entryText_start_work_D.set(round(PD_position, 1)) + entryText_start_work_F.set(round(F_position, 3)) + elif pos == 0: + entryText_end_work_D.set(round(PD_position, 1)) + entryText_end_work_F.set(round(F_position, 3)) + TOMATO_fig1.mpl_disconnect(cid) + + +def calc_rWork(): + x1 = float(entryText_start_work_D.get()) + x2 = float(entryText_end_work_D.get()) + + y1 = float(entryText_start_work_F.get()) + y2 = float(entryText_end_work_F.get()) + + rWork = (x2 - x1) * (y1 + y2) / 2 / 4.114 + + entryText_rWork.set(rWork) + entryText_strWork.set("0") + + +def calc_strWork(): + dsWork = float(entryText_dsWork.get()) + rWork = float(entryText_rWork.get()) + ssWork = float(entryText_ssWork.get()) + + strWork = dsWork + rWork - ssWork + entryText_strWork.set(strWork) + + +def write_to_table(): + global listBox + + if float(entryText_strWork.get()) == 0: + work_done = entryText_rWork.get() + else: + work_done = entryText_strWork.get() + listBox.insert("", "end", values=(filename_TOMATO, entryText_start_work_F.get(), entryText_end_work_F.get(), (float(entryText_start_work_F.get()) + float(entryText_end_work_F.get())) / 2, entryText_start_work_D.get(), entryText_end_work_D.get(), float(entryText_end_work_D.get()) - float(entryText_start_work_D.get()), entry_ds_Lc.get(), entry_ds_Lp.get(), entry_ds_St.get(), entry_ss_Lc.get(), entry_ss_Lp.get(), entry_ss_St.get(), entry_shift_F.get(), entry_shift_d.get(), work_done)) + + +def clear_charts(): + TOMATO_fig1.get_tk_widget().grid_forget() + + +def clear_table(): + global listBox + list_items = listBox.get_children("") + + for item in list_items: + listBox.delete(item) + + +def clear_table_last(): + global listBox + list_items = listBox.get_children("") + + listBox.delete(list_items[-1]) + + +def reset_parameters(): + entryText_shift_d.set("0") + entryText_shift_F.set("0") + entryText_ds_Lp.set("40") + entryText_ds_Lc.set("1256") + entryText_ss_Lc.set("0") + entryText_ss_Lp.set("1") + entryText_ss_St.set("800") + entryText_ds_St.set("400") + entryText_dsWork.set("0") + entryText_ssWork.set("0") + entryText_rWork.set("0") + entryText_strWork.set("0") + + +def fitting_ss_TOMATO(PD_ss, F_ss, Ds_fit_dict, fix, max_range): + + model_ss = lk.odijk("DNA_2") + lk.odijk("RNA") + + model_ss = model_ss.invert().subtract_independent_offset() + lk.force_offset("DNA") + fit_ss = lk.FdFit(model_ss) + + fit_ss.add_data("ss_part", F_ss, PD_ss) + + ## ds part parameters + + # Persistance length bounds + # Lp_ds_range=fit_ds["DNA/Lp"].value/10 + fit_ss["DNA_2/Lp"].value = Ds_fit_dict['Lp_ds'] + fit_ss["DNA_2/Lp"].lower_bound = Ds_fit_dict['Lp_ds'] * (1 - max_range / 100) + fit_ss["DNA_2/Lp"].upper_bound = Ds_fit_dict['Lp_ds'] * (1 + max_range / 100) + # if fix==1: + fit_ss["DNA_2/Lp"].fixed = 'True' + fit_ss["DNA/f_offset"].upper_bound = float(f_off_up.get()) + fit_ss["DNA/f_offset"].lower_bound = float(f_off_low.get()) + fit_ss["DNA/f_offset"].value = Ds_fit_dict['f_offset'] + fit_ss["DNA/f_offset"].fixed = 'True' + + fit_ss["inv(DNA_2_with_RNA)/d_offset"].value = Ds_fit_dict['d_offset'] + fit_ss["inv(DNA_2_with_RNA)/d_offset"].fixed = 'True' + + # contour length + # Lc_ds_range=Lc_initial_guess/100 # nm + fit_ss["DNA_2/Lc"].upper_bound = Ds_fit_dict['Lc_ds'] * (1 + max_range / 100) + fit_ss["DNA_2/Lc"].lower_bound = Ds_fit_dict['Lc_ds'] * (1 - max_range / 100) + fit_ss["DNA_2/Lc"].value = Ds_fit_dict['Lc_ds'] + fit_ss["DNA_2/Lc"].unit = 'nm' + # if fix==1: + fit_ss["DNA_2/Lc"].fixed = 'True' + + # stifness + + fit_ss["DNA_2/St"].upper_bound = Ds_fit_dict['St_ds'] * (1 + max_range / 100) + fit_ss["DNA_2/St"].lower_bound = Ds_fit_dict['St_ds'] * (1 - max_range / 100) + fit_ss["DNA_2/St"].value = Ds_fit_dict['St_ds'] + if fix == 1: + fit_ss["DNA_2/St"].fixed = 'True' + + ## ss part parameters + # Persistance length bounds + + fit_ss["RNA/Lp"].value = float(entryText_ss_Lp.get()) + fit_ss["RNA/Lp"].lower_bound = 0.8 + fit_ss["RNA/Lp"].upper_bound = 2 + if fix == 1: + fit_ss["RNA/Lp"].fixed = 'True' + + # stiffnes + fit_ss["RNA/St"].value = float(entryText_ss_St.get()) + fit_ss["RNA/St"].lower_bound = 300 + fit_ss["RNA/St"].upper_bound = 1500 + # contour length + + fit_ss["RNA/Lc"].upper_bound = float(entryText_ss_Lc.get()) + 100 + fit_ss["RNA/Lc"].lower_bound = 0 + fit_ss["RNA/Lc"].value = float(entryText_ss_Lc.get()) + fit_ss["RNA/Lc"].unit = 'nm' + + fit_ss.fit() + + Fit_dict = {'model': model_ss, 'fit_model': fit_ss, 'Lc_ds': fit_ss["DNA_2/Lc"].value, 'Lp_ds': fit_ss["DNA_2/Lp"].value, 'St_ds': fit_ss["DNA_2/St"].value, 'Lc_ss': fit_ss["RNA/Lc"].value, 'Lp_ss': fit_ss["RNA/Lp"].value, 'St_ss': fit_ss["RNA/St"].value, 'f_offset': fit_ss["DNA/f_offset"].value, 'd_offset': fit_ss["inv(DNA_2_with_RNA)/d_offset"].value} + return Fit_dict + + +def Fitting_WLC_ds_handles(): + # create a sublist of the ROI PD_nm + global ds_fit_dict_TOMATO + global F_region + global F_ds_model + global distance + global real_start, real_end + global real_start_2, real_end_2 + # find match with PD + save_previous_data() + input_settings, input_format, export_data, input_fitting, TOMATO_fitting, input_constantF = check_settings() + + real_PD = [] + start_PD = float(entry_start.get()) + end_PD = float(entry_end.get()) + + for i in [start_PD, end_PD]: + absolute_difference_function = lambda cPD: abs(cPD - i) + real_PD.append(min(Force_Distance_TOMATO[:, 1], key=absolute_difference_function)) + + PD_nm_list = list(Force_Distance_TOMATO[:, 1]) + + real_start = PD_nm_list.index(real_PD[0]) + real_end = PD_nm_list.index(real_PD[1]) + + PD_region = [] + F_region = [] + if real_start < real_end: + for i in range(real_start, real_end, 10): + PD_region.append(Force_Distance_TOMATO[:, 1][i]) + F_region.append(Force_Distance_TOMATO[:, 0][i]) + + else: + for i in range(real_end, real_start, 10): + PD_region.append(Force_Distance_TOMATO[:, 1][i]) + F_region.append(Force_Distance_TOMATO[:, 0][i]) + + Force_Distance_ds_fit = np.array([F_region, PD_region]) + ds_fit_dict_TOMATO, area_ds_TOMATO = fitting_ds(filename_TOMATO, input_settings, export_data, TOMATO_fitting, real_end, Force_Distance_ds_fit, None, None, 1) + + entryText_ds_Lp.set(ds_fit_dict_TOMATO['Lp_ds']) + entryText_shift_F.set(ds_fit_dict_TOMATO['f_offset']) + entryText_shift_d.set(ds_fit_dict_TOMATO["d_offset"]) + entryText_ds_Lc.set(ds_fit_dict_TOMATO['Lc_ds']) + entryText_ds_St.set(ds_fit_dict_TOMATO['St_ds']) + + # plot the marked region and fitted WLC + global TOMATO_fig1 + global figure1 + global subplot1 + # model data + distance = np.arange(min(Force_Distance_TOMATO[:, 1]), max(Force_Distance_TOMATO[:, 1]) + 50, 2) + F_ds_model = ds_fit_dict_TOMATO['model_ds'](distance, ds_fit_dict_TOMATO['fit_model']) + + figure1 = plot_TOMATO(Force_Distance_TOMATO) + + if real_start < real_end: + real_start_2 = real_start + real_end_2 = real_end + else: + real_start_2 = real_end + real_end_2 = real_start + + subplot1 = figure1.add_subplot(111) + subplot1.plot(Force_Distance_TOMATO[:, 1][real_start_2: real_end_2], Force_Distance_TOMATO[:, 0][real_start_2:real_end_2], color="b") + subplot1.plot(distance, F_ds_model, marker=None, linestyle='dashed', linewidth=1, color="black") + subplot1.set_ylim([min(Force_Distance_TOMATO[:, 0]), max(Force_Distance_TOMATO[:, 0])]) + subplot1.set_xlim([min(Force_Distance_TOMATO[:, 1]) - 10, max(Force_Distance_TOMATO[:, 1]) + 10]) + subplot1.tick_params('both', direction='in') + + TOMATO_fig1 = FigureCanvasTkAgg(figure1, TOMATO_frame) + TOMATO_fig1.get_tk_widget().grid(row=0, column=0) + + toolbarFrame = tk.Frame(master=TOMATO_frame) + toolbarFrame.grid(row=2, column=0) + toolbar = NavigationToolbar2Tk(TOMATO_fig1, toolbarFrame) + + entryText_dsWork.set(area_ds_TOMATO) + print("area_ds = " + str(area_ds_TOMATO)) + # add the parameters to table + + +## fitting the ss RNA part combined with ds handles part +def Fitting_WLC_ss_handles(): + # create a sublist of the ROI PD_nm + global F_region + global F_ss_model + global distance + global real_start_3, real_end_3 + # find match with PD + + save_previous_data() + + real_PD = [] + start_PD = float(entry_start.get()) + end_PD = float(entry_end.get()) + + for i in [start_PD, end_PD]: + absolute_difference_function = lambda cPD: abs(cPD - i) + real_PD.append(min(Force_Distance_TOMATO[:, 1], key=absolute_difference_function)) + # print(real_PD) + + PD_nm_list = list(Force_Distance_TOMATO[:, 1]) + real_start = PD_nm_list.index(real_PD[0]) + real_end = PD_nm_list.index(real_PD[1]) + + PD_region = [] + F_region = [] + if abs(real_start - real_end) > 1000: + if real_start < real_end: + for i in range(real_start, real_end, 1000): + PD_region.append(Force_Distance_TOMATO[:, 1][i]) + F_region.append(Force_Distance_TOMATO[:, 0][i]) + + else: + for i in range(real_end, real_start, 1000): + PD_region.append(Force_Distance_TOMATO[:, 1][i]) + F_region.append(Force_Distance_TOMATO[:, 0][i]) + else: + if real_start < real_end: + for i in range(real_start, real_end, 100): + PD_region.append(Force_Distance_TOMATO[:, 1][i]) + F_region.append(Force_Distance_TOMATO[:, 0][i]) + + else: + for i in range(real_end, real_start, 100): + PD_region.append(Force_Distance_TOMATO[:, 1][i]) + F_region.append(Force_Distance_TOMATO[:, 0][i]) + + #fitting itself + Fit_ss = fitting_ss_TOMATO(PD_region, F_region, ds_fit_dict_TOMATO, 1, 1) + entryText_ds_Lp.set(Fit_ss['Lp_ds']) + entryText_shift_F.set(Fit_ss['f_offset']) + entryText_shift_d.set(Fit_ss["d_offset"]) + entryText_ds_Lc.set(Fit_ss['Lc_ds']) + entryText_ss_Lc.set(Fit_ss['Lc_ss']) + entryText_ss_Lp.set(Fit_ss['Lp_ss']) + entryText_ss_St.set(Fit_ss['St_ss']) + entryText_ds_St.set(Fit_ss['St_ds']) + + # model data + # distance = np.arange(min(PD_nm), max(PD_nm), 1) + F_ss_model = Fit_ss['model'](distance, Fit_ss['fit_model']) + # plot the marked region and fitted WLC + global TOMATO_fig1 + global figure1 + global subplot1 + + if real_start < real_end: + real_start_3 = real_start + real_end_3 = real_end + else: + real_start_3 = real_end + real_end_3 = real_start + + subplot1.plot(Force_Distance_TOMATO[:, 1][real_start_3:real_end_3], Force_Distance_TOMATO[:, 0][real_start_3:real_end_3], color="r") + subplot1.plot(distance, F_ss_model, marker=None, linewidth=1, linestyle='dashed', color="black") + + subplot1.set_ylim([min(Force_Distance_TOMATO[:, 0]), max(Force_Distance_TOMATO[:, 0])]) + subplot1.set_xlim([min(Force_Distance_TOMATO[:, 1]) - 10, max(Force_Distance_TOMATO[:, 1]) + 10]) + + TOMATO_fig1 = FigureCanvasTkAgg(figure1, TOMATO_frame) + TOMATO_fig1.get_tk_widget().grid(row=0, column=0) + + toolbarFrame = tk.Frame(master=TOMATO_frame) + toolbarFrame.grid(row=2, column=0) + toolbar = NavigationToolbar2Tk(TOMATO_fig1, toolbarFrame) + + distance_integral = np.arange(float(entryText_start_work_D.get()), float(entryText_end_work_D.get())) + ss_integral = Fit_ss['model'](distance_integral, Fit_ss['fit_model']) + area_ss = simps(ss_integral) / 4.114 + entryText_ssWork.set(area_ss) + # add the parameters to table + + +def export_table(): + global listBox + global name + global Fit_results + ''' exporting the table results ''' + results = [] + for child in listBox.get_children(): + results.append(listBox.item(child)['values']) + + Fit_results = pd.DataFrame(results, + columns=[ + 'Filename', + 'F1', + 'F2', + 'F1/2', + 'Step start', + 'Step end', + 'Step length', + 'ds Contour length', + 'ds Persistance Length', + 'ds St', + 'ss Contour Length', + 'ss Persistance Length', + 'ss St', + 'Shift F', + 'Shift x', + 'Work' + ] + ) + + name = filedialog.asksaveasfile(mode='w', defaultextension=".csv") + print(name) + Fit_results.to_csv(name.name, index=False, header=True) + + +def export_model(): + global listBox + global name + global Fit_results + ''' exporting ds and ss model ''' + try: + F_ss_model + model_data = pd.DataFrame(list(zip(distance, F_ds_model, F_ss_model)), columns=['Distance [nm]', 'Force WLC data [pN]', 'Force WLC+FJC data [pN]']) + except NameError: + model_data = pd.DataFrame(list(zip(distance, F_ds_model)), columns=['Distance [nm]', 'Force WLC data [pN]']) + + name = filedialog.asksaveasfile(mode='w', defaultextension=".csv") + name_model = name.name[:-4] + '_model_data.csv' + model_data.to_csv(name_model, index=False, header=True) + + ''' exporting figure ''' + plotname = name.name[:-4] + '_graph.png' + figure1.savefig(plotname, dpi=600) + + +def zero_str_work(): + entryText_strWork.set("0") +############## TOMATO functions end ################### + + """ start the main process and Tkinter application """ if __name__ == '__main__': mp.freeze_support() @@ -428,17 +1138,20 @@ def on_closing(): tab2 = ttk.Frame(tabControl, width=800, height=600) tab3 = ttk.Frame(tabControl, width=800, height=600) tab4 = ttk.Frame(tabControl, width=800, height=600) + tab5 = ttk.Frame(tabControl, width=800, height=600) tab1.grid(row=0, column=0, padx=2, pady=2) tab2.grid(row=0, column=0, padx=2, pady=2) tab3.grid(row=0, column=0, padx=2, pady=2) tab4.grid(row=0, column=0, padx=2, pady=2) + tab5.grid(row=0, column=0, padx=2, pady=2) # ATTENTION - tab3 and tab4 are displayed the other way round in the GUI tabControl.add(tab1, text="Folder Analysis") tabControl.add(tab2, text="Show Single File") tabControl.add(tab4, text="Constant Force Analysis") tabControl.add(tab3, text="Advanced Settings") + tabControl.add(tab5, text="Manual Analysis - TOMATO") """ divide the tab1 into frames """ # output window @@ -472,38 +1185,81 @@ def on_closing(): check_box = tk.Frame(tab1) check_box.grid(row=0, column=1) - def select_box(check_box_1, check_box_2, check_box_3): - if check_box_1.get() == 1: - check_box_2.set(value=0) - check_box_3.set(value=0) - elif check_box_1.get() == 0 and check_box_2.get() == 0 and check_box_3.get() == 0: - check_box_1.set(value=1) + def select_box(*check_box): + for i in check_box: + if i.get() == 1: + for n in check_box: + if not n == i: + n.set(value=0) + boxes = [check_box[x].get() for x in range(len(check_box))] + if all(boxes) == 0: + check_box[0].set(value=1) check_box_HF = tk.IntVar(value=1) check_box_LF = tk.IntVar() check_box_CSV = tk.IntVar() - - check1 = tk.Checkbutton( + check_box_Trap1 = tk.IntVar() + check_box_Trap2 = tk.IntVar(value=1) + check_box_um = tk.IntVar(value=1) + check_box_nm = tk.IntVar() + check_box_multiH5 = tk.IntVar() + check_box_preprocess = tk.IntVar(value=1) + + check_HF = tk.Checkbutton( check_box, text="High Frequency (Piezo Distance)", variable=check_box_HF, command=lambda: [select_box(check_box_HF, check_box_LF, check_box_CSV), parameters(parameter_frame, default_values_HF, default_values_FIT, default_values_constantF)] ).grid(row=0, column=0, sticky='W') - check2 = tk.Checkbutton( + check_LF = tk.Checkbutton( check_box, text="Low Frequency", variable=check_box_LF, command=lambda: [select_box(check_box_LF, check_box_HF, check_box_CSV), parameters(parameter_frame, default_values_LF, default_values_FIT, default_values_constantF)] ).grid(row=1, column=0, sticky='W') - check3 = tk.Checkbutton( + check_CSV = tk.Checkbutton( check_box, - text="CSV (F/D)", + text="CSV (F(pN) | d)", variable=check_box_CSV, command=lambda: [select_box(check_box_CSV, check_box_HF, check_box_LF), parameters(parameter_frame, default_values_CSV, default_values_FIT, default_values_constantF)] ).grid(row=2, column=0, sticky='W') + check_Trap1 = tk.Checkbutton( + check_box, + text="Trap 1x", + variable=check_box_Trap1, + command=lambda: select_box(check_box_Trap1, check_box_Trap2) + ).grid(row=0, column=1, padx=8, sticky='W') + + check_Trap2 = tk.Checkbutton( + check_box, + text="Trap 2x", + variable=check_box_Trap2, + command=lambda: select_box(check_box_Trap2, check_box_Trap1) + ).grid(row=1, column=1, padx=8, sticky='W') + + check_um = tk.Checkbutton( + check_box, + text="µm input", + variable=check_box_um, + command=lambda: select_box(check_box_um, check_box_nm) + ).grid(row=2, column=1, padx=8, sticky='W') + + check_nm = tk.Checkbutton( + check_box, + text="nm input", + variable=check_box_nm, + command=lambda: select_box(check_box_nm, check_box_um) + ).grid(row=3, column=1, padx=8, sticky='W') + + check_Multi = tk.Checkbutton( + check_box, + text="MultiH5", + variable=check_box_multiH5 + ).grid(row=4, column=0, sticky='W') + figure_frame = tk.Canvas(tab1, height=650, width=1000, borderwidth=1, relief='ridge') figure_frame.grid(row=1, column=0) @@ -512,12 +1268,17 @@ def select_box(check_box_1, check_box_2, check_box_3): """ parameter frame """ Cluster_preprocessing = tk.Label(parameter_frame, text='PREPROCESSING', font='Helvetica 9 bold') + check_preprocess = tk.Checkbutton( + parameter_frame, + variable=check_box_preprocess + ).grid(row=0, column=1, pady=(20, 2), sticky='W') Label_downsample = tk.Label(parameter_frame, text='Downsampling rate') Label_Filter1 = tk.Label(parameter_frame, text='Butterworth filter degree') Label_Filter2 = tk.Label(parameter_frame, text='Cut-off frequency') Label_ForceMin = tk.Label(parameter_frame, text='Force threshold, pN') Cluster_statistics = tk.Label(parameter_frame, text='STATISTICS', font='Helvetica 9 bold') - Label_STD_1 = tk.Label(parameter_frame, text='Z-score') + Label_Zscore_F = tk.Label(parameter_frame, text='Z-score force') + Label_Zscore_D = tk.Label(parameter_frame, text='Z-score distance') downsample_value1 = tk.Entry(parameter_frame) downsample_value1.bind("", lambda event: user_input(event, downsample_value1, downsample_value2)) @@ -531,8 +1292,11 @@ def select_box(check_box_1, check_box_2, check_box_3): Force_Min1 = tk.Entry(parameter_frame) Force_Min1.bind("", lambda event: user_input(event, Force_Min1, Force_Min2)) - STD1_threshold1 = tk.Entry(parameter_frame) - STD1_threshold1.bind("", lambda event: user_input(event, STD1_threshold1, STD1_threshold2)) + Z_score_force1 = tk.Entry(parameter_frame) + Z_score_force1.bind("", lambda event: user_input(event, Z_score_force1, Z_score_force2)) + + Z_score_distance1 = tk.Entry(parameter_frame) + Z_score_distance1.bind("", lambda event: user_input(event, Z_score_distance1, Z_score_distance2)) Cluster_preprocessing.grid(row=0, column=0, padx=2, pady=(20, 2)) Label_downsample.grid(row=1, column=0, sticky=tk.E + tk.W, padx=2, pady=2) @@ -548,8 +1312,11 @@ def select_box(check_box_1, check_box_2, check_box_3): Force_Min1.grid(row=4, column=1, padx=2, pady=2) Cluster_statistics.grid(row=5, column=0, padx=2, pady=(20, 2)) - Label_STD_1.grid(row=6, column=0, sticky=tk.E + tk.W, padx=2, pady=2) - STD1_threshold1.grid(row=6, column=1, padx=2, pady=2) + Label_Zscore_F.grid(row=6, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + Z_score_force1.grid(row=6, column=1, padx=2, pady=2) + + Label_Zscore_D.grid(row=7, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + Z_score_distance1.grid(row=7, column=1, padx=2, pady=2) BUTTON1 = tk.Button( parameter_frame, @@ -611,11 +1378,12 @@ def select_box(check_box_1, check_box_2, check_box_3): Label_Filter1 = tk.Label(frame1, text='Butterworth filter degree') Label_Filter2 = tk.Label(frame1, text='Cut-off frequency') Label_ForceMin = tk.Label(frame1, text='Force threshold, pN') - Cluster_derivation = tk.Label(frame1, text="DERIVATION", font='Helvetica 9 bold') + Cluster_derivative = tk.Label(frame1, text="DERIVATIVE", font='Helvetica 9 bold') Label_step_d = tk.Label(frame1, text='Step d') Label_Frequency = tk.Label(frame1, text='Data frequency, Hz') Cluster_statistics = tk.Label(frame1, text='STATISTICS', font='Helvetica 9 bold') - Label_STD_1 = tk.Label(frame1, text='Z-score') + Label_Zscore_F = tk.Label(frame1, text='Z-score force') + Label_Zscore_D = tk.Label(frame1, text='Z-score distance') Label_window_size = tk.Label(frame1, text='Moving median window size') Label_STD_difference = tk.Label(frame1, text='SD difference threshold') @@ -631,8 +1399,11 @@ def select_box(check_box_1, check_box_2, check_box_3): Force_Min2 = tk.Entry(frame1) Force_Min2.bind("", lambda event: user_input(event, Force_Min2, Force_Min1)) - STD1_threshold2 = tk.Entry(frame1) - STD1_threshold2.bind("", lambda event: user_input(event, STD1_threshold2, STD1_threshold1)) + Z_score_force2 = tk.Entry(frame1) + Z_score_force2.bind("", lambda event: user_input(event, Z_score_force2, Z_score_force1)) + + Z_score_distance2 = tk.Entry(frame1) + Z_score_distance2.bind("", lambda event: user_input(event, Z_score_distance2, Z_score_distance1)) step_d_value = tk.Entry(frame1) window_size_value = tk.Entry(frame1) @@ -652,7 +1423,7 @@ def select_box(check_box_1, check_box_2, check_box_3): Label_ForceMin.grid(row=4, column=0, padx=2, pady=2) Force_Min2.grid(row=4, column=1, padx=(0, 20), pady=2) - Cluster_derivation.grid(row=5, column=0, padx=2, pady=(20, 2)) + Cluster_derivative.grid(row=5, column=0, padx=2, pady=(20, 2)) Label_step_d.grid(row=6, column=0, padx=2, pady=2) step_d_value.grid(row=6, column=1, padx=(0, 20), pady=2) @@ -660,14 +1431,17 @@ def select_box(check_box_1, check_box_2, check_box_3): Frequency_value.grid(row=7, column=1, padx=(0, 20), pady=2) Cluster_statistics.grid(row=8, column=0, padx=2, pady=(20, 2)) - Label_STD_1.grid(row=9, column=0, sticky=tk.E + tk.W, padx=2, pady=2) - STD1_threshold2.grid(row=9, column=1, padx=(0, 20), pady=2) + Label_Zscore_F.grid(row=9, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + Z_score_force2.grid(row=9, column=1, padx=(0, 20), pady=2) + + Label_Zscore_D.grid(row=10, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + Z_score_distance2.grid(row=10, column=1, padx=(0, 20), pady=2) - Label_window_size.grid(row=11, column=0, padx=2, pady=2) - window_size_value.grid(row=11, column=1, padx=(0, 20), pady=2) + Label_window_size.grid(row=12, column=0, padx=2, pady=2) + window_size_value.grid(row=12, column=1, padx=(0, 20), pady=2) - Label_STD_difference.grid(row=12, column=0, padx=2, pady=2) - STD_difference_value.grid(row=12, column=1, padx=(0, 20), pady=2) + Label_STD_difference.grid(row=13, column=0, padx=2, pady=2) + STD_difference_value.grid(row=13, column=1, padx=(0, 20), pady=2) """ Output settings """ check_box_smooth_data = tk.IntVar(value=1) @@ -682,31 +1456,31 @@ def select_box(check_box_1, check_box_2, check_box_3): frame2, text="Processed FD data", variable=check_box_smooth_data, - ).grid(row=1, column=0, sticky='W') + ).grid(row=1, column=0, sticky='W') check_2 = tk.Checkbutton( frame2, text="Plot", variable=check_box_plot, - ).grid(row=2, column=0, sticky='W') + ).grid(row=2, column=0, sticky='W') check_3 = tk.Checkbutton( frame2, text="Steps found", variable=check_box_steps, - ).grid(row=3, column=0, sticky='W') + ).grid(row=3, column=0, sticky='W') check_4 = tk.Checkbutton( frame2, text="Total results (All steps from all files)", variable=check_box_total_results, - ).grid(row=4, column=0, sticky='W') + ).grid(row=4, column=0, sticky='W') check_5 = tk.Checkbutton( frame2, text="Fitting", variable=check_box_fitting, - ).grid(row=5, column=0, sticky='W') + ).grid(row=5, column=0, sticky='W') """ Fitting parameters """ Cluster_fitting = tk.Label(frame3, text='FITTING', font='Helvetica 9 bold') @@ -753,14 +1527,14 @@ def select_box(check_box_1, check_box_2, check_box_3): text="WLC+WLC", variable=check_box_WLC, command=lambda: [check_box_WLC.set(value=1), check_box_FJC.set(value=0)] - ).grid(row=1, column=0, sticky='W', pady=20) + ).grid(row=1, column=0, sticky='W', pady=20) check_FJC = tk.Checkbutton( frame3, text="WLC+FJC", variable=check_box_FJC, command=lambda: [check_box_WLC.set(value=0), check_box_FJC.set(value=1)] - ).grid(row=1, column=1, sticky='W', pady=20) + ).grid(row=1, column=1, sticky='W', pady=20) Label_dsLp.grid(row=2, column=0, sticky=tk.E + tk.W, padx=2, pady=2) dsLp.grid(row=2, column=1, padx=(0, 20), pady=2) @@ -898,6 +1672,347 @@ def select_box(check_box_1, check_box_2, check_box_3): Label_amplitude_gauss.grid(row=9, column=0, sticky=tk.E + tk.W, padx=2, pady=2) amplitude_gauss.grid(row=9, column=1, sticky=tk.E + tk.W, padx=2, pady=2) + """organize tab5 ---- TOMATO""" + tab5.columnconfigure([0, 1], weight=1, minsize=75) + tab5.rowconfigure(0, weight=1, minsize=50) + + canvas1 = tk.Canvas(tab5, width=650, height=700) + canvas1.grid(row=0, column=1) + + canvas2 = tk.Canvas(tab5, width=650, height=100) + canvas2.grid(row=1, column=1) + + TOMATO_frame = tk.Frame(tab5, width=500, height=700) + TOMATO_frame.grid(row=0, column=0) + + canvas_name = tk.Canvas(tab5, width=400, height=50) + canvas_name.grid(row=1, column=0) + + frame_table = tk.Frame(tab5, width=400, height=100) + frame_table.grid(row=2, column=0) + + label_shift_d = tk.Label(tab5, text='Shift x [nm]') + label_shift_d.config(font=('Arial', 10)) + canvas1.create_window(100, 160, window=label_shift_d) + + entryText_shift_d = tk.StringVar() + entry_shift_d = tk.Entry(tab5, textvariable=entryText_shift_d) + canvas1.create_window(100, 180, window=entry_shift_d) + entryText_shift_d.set("0") + + # K0 for both + # ds + label_ds_St = tk.Label(tab5, text='K0 ds (St)') + label_ds_St.config(font=('Arial', 10)) + canvas1.create_window(100, 200, window=label_ds_St) + + entryText_ds_St = tk.StringVar() + entry_ds_St = tk.Entry(tab5, textvariable=entryText_ds_St) + canvas1.create_window(100, 220, window=entry_ds_St) + entryText_ds_St.set("450") + + # ss + label_ss_St = tk.Label(tab5, text='K0 ss (St)') + label_ss_St.config(font=('Arial', 10)) + canvas1.create_window(300, 200, window=label_ss_St) + + entryText_ss_St = tk.StringVar() + entry_ss_St = tk.Entry(tab5, textvariable=entryText_ss_St) + canvas1.create_window(300, 220, window=entry_ss_St) + entryText_ss_St.set("800") + + # shift in F + label_shift_F = tk.Label(tab5, text='shift F [pN]') + label_shift_F.config(font=('Arial', 10)) + canvas1.create_window(300, 160, window=label_shift_F) + + entryText_shift_F = tk.StringVar() + entry_shift_F = tk.Entry(tab5, textvariable=entryText_shift_F) + canvas1.create_window(300, 180, window=entry_shift_F) + entryText_shift_F.set("0") + + ## ds handle part + # ds handle persistance length + label_ds_Lp = tk.Label(tab5, text='dsHandles Lp [nm]') + label_ds_Lp.config(font=('Arial', 10)) + canvas1.create_window(100, 240, window=label_ds_Lp) + + entryText_ds_Lp = tk.StringVar() + entry_ds_Lp = tk.Entry(tab5, textvariable=entryText_ds_Lp) + canvas1.create_window(100, 260, window=entry_ds_Lp) + entryText_ds_Lp.set("40") + + # ds handle contour length + + label_ds_Lc = tk.Label(tab5, text='dsHandles Lc [nm]') + label_ds_Lc.config(font=('Arial', 10)) + canvas1.create_window(100, 290, window=label_ds_Lc) + + entryText_ds_Lc = tk.StringVar() + entry_ds_Lc = tk.Entry(tab5, textvariable=entryText_ds_Lc) + canvas1.create_window(100, 310, window=entry_ds_Lc) + entryText_ds_Lc.set("1256") + + ## ss RNA part + # ss RNA persistance length + label_ss_Lp = tk.Label(tab5, text=' ssRNA Lp [nm]') + label_ss_Lp.config(font=('Arial', 10)) + canvas1.create_window(300, 240, window=label_ss_Lp) + + entryText_ss_Lp = tk.StringVar() + entry_ss_Lp = tk.Entry(tab5, textvariable=entryText_ss_Lp) + canvas1.create_window(300, 260, window=entry_ss_Lp) + entryText_ss_Lp.set("1") + + # ss RNA contour length + label_ss_Lc = tk.Label(tab5, text=' ssRNA Lc [nm]') + label_ss_Lc.config(font=('Arial', 10)) + canvas1.create_window(300, 290, window=label_ss_Lc) + + entryText_ss_Lc = tk.StringVar() + entry_ss_Lc = tk.Entry(tab5, textvariable=entryText_ss_Lc) + canvas1.create_window(300, 310, window=entry_ss_Lc) + entryText_ss_Lc.set("0") + + # work done + # ds work done + label_dsWork = tk.Label(tab5, text='ds Work done [KbT]') + label_dsWork.config(font=('Arial', 10)) + canvas1.create_window(520, 160, window=label_dsWork) + + entryText_dsWork = tk.StringVar() + entry_dsWork = tk.Entry(tab5, textvariable=entryText_dsWork) + canvas1.create_window(520, 180, window=entry_dsWork) + entryText_dsWork.set("0") + + # ss work done + label_ssWork = tk.Label(tab5, text='ss Work done [KbT]') + label_ssWork.config(font=('Arial', 10)) + canvas1.create_window(520, 200, window=label_ssWork) + + entryText_ssWork = tk.StringVar() + entry_ssWork = tk.Entry(tab5, textvariable=entryText_ssWork) + canvas1.create_window(520, 220, window=entry_ssWork) + entryText_ssWork.set("0") + + # rectangle work done + label_rWork = tk.Label(tab5, text='rect. Work done [KbT]') + label_rWork.config(font=('Arial', 10)) + canvas1.create_window(520, 240, window=label_rWork) + + entryText_rWork = tk.StringVar() + entry_rWork = tk.Entry(tab5, textvariable=entryText_rWork) + canvas1.create_window(520, 260, window=entry_rWork) + entryText_rWork.set("0") + + # work done by structure + + label_strWork = tk.Label(tab5, text='str. Work done [KbT]') + label_strWork.config(font=('Arial', 10)) + canvas1.create_window(520, 290, window=label_strWork) + + entryText_strWork = tk.StringVar() + entry_strWork = tk.Entry(tab5, textvariable=entryText_strWork) + canvas1.create_window(520, 310, window=entry_strWork) + entryText_strWork.set("0") + + # start position + entryText_start = tk.StringVar() + entry_start = tk.Entry(tab5, textvariable=entryText_start) + canvas1.create_window(250, 350, window=entry_start, width=50, height=20) + + # end position + entryText_end = tk.StringVar() + entry_end = tk.Entry(tab5, textvariable=entryText_end) + canvas1.create_window(250, 390, window=entry_end, width=50, height=20) + + ## Work start + # start position + label_start_work_D = tk.Label(tab5, text='D') + label_start_work_D.config(font=('Arial', 10)) + canvas1.create_window(500, 330, window=label_start_work_D) + + entryText_start_work_D = tk.StringVar() + entry_start_work_D = tk.Entry(tab5, textvariable=entryText_start_work_D) + canvas1.create_window(500, 350, window=entry_start_work_D, width=50, height=20) + entryText_start_work_D.set('0') + + label_start_work_F = tk.Label(tab5, text='F') + label_start_work_F.config(font=('Arial', 10)) + canvas1.create_window(550, 330, window=label_start_work_F) + + entryText_start_work_F = tk.StringVar() + entry_start_work_F = tk.Entry(tab5, textvariable=entryText_start_work_F) + canvas1.create_window(550, 350, window=entry_start_work_F, width=50, height=20) + entryText_start_work_F.set('0') + + ## work end + # end positio + label_start_work_D = tk.Label(tab5, text='D') + label_start_work_D.config(font=('Arial', 10)) + canvas1.create_window(500, 390, window=label_start_work_D) + + entryText_end_work_D = tk.StringVar() + entry_end_work_D = tk.Entry(tab5, textvariable=entryText_end_work_D) + canvas1.create_window(500, 390, window=entry_end_work_D, width=50, height=20) + entryText_end_work_D.set('0') + + label_start_work_F = tk.Label(tab5, text='F') + label_start_work_F.config(font=('Arial', 10)) + canvas1.create_window(550, 390, window=label_start_work_F) + + entryText_end_work_F = tk.StringVar() + entry_end_work_F = tk.Entry(tab5, textvariable=entryText_end_work_F) + canvas1.create_window(550, 390, window=entry_end_work_F, width=50, height=20) + entryText_end_work_F.set('0') + + entryText_filename = tk.StringVar() + entry_filename = tk.Entry(tab5, textvariable=entryText_filename) + canvas_name.create_window(200, 30, window=entry_filename, width=800, height=30) + + ## shortcut descriptions + label_shortcut = tk.Label(tab5, text='Keyboard shortcuts:') + label_shortcut.config(font=('Arial', 10)) + canvas1.create_window(100, 650, window=label_shortcut) + + label_shortcut_arrows = tk.Label(tab5, text='Left/right/a/d arrow - switching between curves') + label_shortcut_arrows.config(font=('Arial', 10)) + canvas1.create_window(180, 680, window=label_shortcut_arrows) + + label_shortcut_save = tk.Label(tab5, text='Enter or Shift_L - Save') + label_shortcut_save.config(font=('Arial', 10)) + canvas1.create_window(105, 710, window=label_shortcut_save) + + label_shortcut_start = tk.Label(tab5, text='s - fit start') + label_shortcut_start.config(font=('Arial', 10)) + canvas2.create_window(60, 30, window=label_shortcut_start) + + label_shortcut_end = tk.Label(tab5, text='e - fit end') + label_shortcut_end.config(font=('Arial', 10)) + canvas2.create_window(60, 55, window=label_shortcut_end) + + label_shortcut_work_start = tk.Label(tab5, text='space+s - work start') + label_shortcut_work_start.config(font=('Arial', 10)) + canvas2.create_window(290, 30, window=label_shortcut_work_start) + + label_shortcut_work_end = tk.Label(tab5, text='space+e - work end') + label_shortcut_work_end.config(font=('Arial', 10)) + canvas2.create_window(290, 55, window=label_shortcut_work_end) + + label_shortcut_zero = tk.Label(tab5, text='o or ;/° - zero str work') + label_shortcut_zero.config(font=('Arial', 10)) + canvas2.create_window(105, 80, window=label_shortcut_zero) + + label_shortcut_fit_ds = tk.Label(tab5, text='space+f - fit ds') + label_shortcut_fit_ds.config(font=('Arial', 10)) + canvas2.create_window(80, 105, window=label_shortcut_fit_ds) + + label_shortcut_fit_ss = tk.Label(tab5, text='space+g - fit ss') + label_shortcut_fit_ss.config(font=('Arial', 10)) + canvas2.create_window(270, 105, window=label_shortcut_fit_ss) + + label_shortcut_rWork = tk.Label(tab5, text='r - rWork') + label_shortcut_rWork.config(font=('Arial', 10)) + canvas2.create_window(450, 30, window=label_shortcut_rWork) + + label_shortcut_start = tk.Label(tab5, text='t - strWork') + label_shortcut_start.config(font=('Arial', 10)) + canvas2.create_window(455, 55, window=label_shortcut_start) + + ## create button widgets that use the defined functions + browseButton_CSV = tk.Button(tab5, text=" Choose folder ", command=open_folder, bg='green', fg='white', font=('Arial', 11, 'bold')) + canvas1.create_window(200, 50, window=browseButton_CSV) + + button_create = tk.Button(tab5, text=' Create Charts ', command=create_chart, bg='palegreen2', font=('Arial', 11, 'bold')) + canvas1.create_window(200, 90, window=button_create) + + button_clear = tk.Button(tab5, text=' Clear Charts ', command=clear_charts, bg='lightskyblue2', font=('Arial', 11, 'bold')) + canvas1.create_window(200, 130, window=button_clear) + + button_start = tk.Button(tab5, text='Set start', command=start_click, bg='lightsteelblue2', font=('Arial', 11, 'bold')) + canvas1.create_window(150, 350, window=button_start) + + button_end = tk.Button(tab5, text='Set end', command=end_click, bg='lightsteelblue2', font=('Arial', 11, 'bold')) + canvas1.create_window(150, 390, window=button_end) + + button_fit_Lp_shift_x = tk.Button(tab5, text='Fit ds Lp & shift_x', command=Fitting_WLC_ds_handles, bg='PeachPuff', font=('Arial', 10, 'bold')) + canvas1.create_window(100, 450, window=button_fit_Lp_shift_x, width=150) + + button_fit_Lc = tk.Button(tab5, text='Fit ss Lc', command=Fitting_WLC_ss_handles, bg='PeachPuff', font=('Arial', 10, 'bold')) + canvas1.create_window(300, 450, window=button_fit_Lc, width=90) + + button_export = tk.Button(tab5, text='Export', command=export_table, bg='palegreen2', font=('Arial', 14, 'bold')) + canvas1.create_window(100, 550, window=button_export) + + button_clear_last = tk.Button(tab5, text='Delete last', command=clear_table_last, bg='red', font=('Arial', 10, 'bold')) + canvas1.create_window(250, 520, window=button_clear_last) + + button_clear_table = tk.Button(tab5, text='Delete all', command=clear_table, bg='red', font=('Arial', 10, 'bold')) + canvas1.create_window(250, 600, window=button_clear_table) + + button_reset_parameters = tk.Button(tab5, text='Reset prmtrs', command=reset_parameters, bg='PeachPuff', font=('Arial', 10, 'bold')) + canvas1.create_window(400, 100, window=button_reset_parameters) + + button_start_work = tk.Button(tab5, text='Set W start', command=start_work_click, bg='lightsteelblue2', font=('Arial', 11, 'bold')) + canvas1.create_window(400, 350, window=button_start_work) + + button_end_work = tk.Button(tab5, text='Set W end', command=end_work_click, bg='lightsteelblue2', font=('Arial', 11, 'bold')) + canvas1.create_window(400, 390, window=button_end_work) + + button_rWork = tk.Button(tab5, text='rWork', command=calc_rWork, bg='PeachPuff', font=('Arial', 10, 'bold')) + canvas1.create_window(500, 450, window=button_rWork, width=90) + + button_rWork = tk.Button(tab5, text='rWork', command=calc_rWork, bg='PeachPuff', font=('Arial', 10, 'bold')) + canvas1.create_window(500, 450, window=button_rWork, width=90) + + button_strWork = tk.Button(tab5, text='strWork', command=calc_strWork, bg='PeachPuff', font=('Arial', 10, 'bold')) + canvas1.create_window(500, 500, window=button_strWork, width=90) + + button_next_FD = tk.Button(tab5, text='Next FD >', command=lambda: change_FD(1), bg='lightsteelblue2', font=('Arial', 10, 'bold')) + canvas1.create_window(500, 50, window=button_next_FD, width=90) + + root.bind("", next_FD_key) + root.bind("", next_FD_key) + root.bind("", previous_FD_key) + root.bind("", previous_FD_key) + root.bind("", save_key) + root.bind("", save_key) + root.bind("", start_click_key) + root.bind("", end_click_key) + root.bind("", start_work_click_key) + root.bind("", end_work_click_key) + root.bind("", zero_str_work_key) + root.bind("<;>", zero_str_work_key) + root.bind("", fit_ds_key) + root.bind("", fit_ss_key) + root.bind("", calc_rWork_key) + root.bind("", calc_strWork_key) + root.bind("", load_previous_data_key) + + button_previous_FD = tk.Button(tab5, text='< Prev. FD', command=lambda: change_FD(-1), bg='lightsteelblue2', font=('Arial', 10, 'bold')) + canvas1.create_window(400, 50, window=button_previous_FD, width=90) + + button_save = tk.Button(tab5, text='Save', command=write_to_table, bg='palegreen2', font=('Arial', 14, 'bold')) + canvas1.create_window(400, 500, window=button_save, width=90) + + button_export = tk.Button(tab5, text='Export model', command=export_model, bg='palegreen2', font=('Arial', 10, 'bold')) + canvas1.create_window(100, 600, window=button_export) + + button_zero = tk.Button(tab5, text='0', command=zero_str_work, bg='palegreen2', font=('Arial', 14, 'bold')) + canvas1.create_window(620, 300, window=button_zero, width=30) + + ## show the fitting parameters in a table + # create Treeview with 3 columns + cols = ('Filename', 'F1', 'F2', 'F1/2', 'Step start', 'Step end', 'Step length', 'ds Lc', 'ds Lp', 'ds St', 'ss Lc', 'ss Lp', 'ss St', 'Shift F', 'Shift x', 'Work') + listBox = ttk.Treeview(frame_table, columns=cols, show='headings', height=5) + # set column headings + for col in cols: + listBox.heading(col, text=col) + listBox.column(col, minwidth=0, width=65) + listBox.grid(row=1, column=0, columnspan=1, padx=5, pady=5) + ######### TOMATO end ############ + + ############ POTATO last part ############### # put default values into the widgets parameters(parameter_frame, default_values_HF, default_values_FIT, default_values_constantF) diff --git a/POTATO_TOMATO.py b/POTATO_TOMATO.py new file mode 100644 index 0000000..f48545f --- /dev/null +++ b/POTATO_TOMATO.py @@ -0,0 +1,19 @@ + +from matplotlib.figure import Figure + + +def plot_TOMATO(FD): + figure1 = Figure(figsize=(9, 7), dpi=100) + subplot1 = figure1.add_subplot(111) + + F = FD[:, 0] + PD_nm = FD[:, 1] + + subplot1.set_xlabel("Distance [$\\mu$m]") + subplot1.set_ylabel("Force [pN]") + subplot1.plot(PD_nm, F, color='gray') + subplot1.tick_params('both', direction='in') + subplot1.set_ylim([min(F), max(F)]) + subplot1.set_xlim([min(PD_nm) - 10, max(PD_nm) + 10]) + + return figure1 diff --git a/POTATO_config.py b/POTATO_config.py index ea96444..f43f357 100644 --- a/POTATO_config.py +++ b/POTATO_config.py @@ -1,4 +1,5 @@ """Copyright 2021 Helmholtz-Zentrum für Infektionsforschung GmbH""" + """default values for each data type""" default_values_HF = { @@ -6,7 +7,8 @@ 'Butterworth filter degree': '4', 'Cut-off frequency': '0.005', 'Force threshold, pN': '5', - 'Z-score': '3', + 'Z-score force': '3', + 'Z-score distance': '3', 'Step d': '10', 'Moving median window size': '800', 'STD difference threshold': '0.05', @@ -18,7 +20,8 @@ 'Butterworth filter degree': '2', 'Cut-off frequency': '0.5', 'Force threshold, pN': '5', - 'Z-score': '3', + 'Z-score force': '3', + 'Z-score distance': '3', 'Step d': '3', 'Moving median window size': '20', 'STD difference threshold': '0.05', @@ -26,15 +29,16 @@ } default_values_CSV = { - 'Downsampling rate': '2', - 'Butterworth filter degree': '4', - 'Cut-off frequency': '0.005', + 'Downsampling rate': '1', + 'Butterworth filter degree': '1', + 'Cut-off frequency': '0.01', 'Force threshold, pN': '5', - 'Z-score': '3', + 'Z-score force': '2.5', + 'Z-score distance': '3', 'Step d': '10', - 'Moving median window size': '250', + 'Moving median window size': '120', 'STD difference threshold': '0.05', - 'Data frequency, Hz': '1000' + 'Data frequency, Hz': '20' } default_values_FIT = { @@ -42,10 +46,10 @@ 'Persistance-Length ds, upper bound, nm': '80', 'Persistance-Length ds, lower bound, nm': '12', 'Persistance-Length ss, nm': '1', - 'Contour-Length ds, nm': '1256', + 'Contour-Length ds, nm': '830', 'Contour-Length ss, nm': '0', 'Stiffness ds, pN': '500', - 'Stiffness ds, upper bound, pN': '600', + 'Stiffness ds, upper bound, pN': '1500', 'Stiffness ds, lower bound, pN': '400', 'Stiffness ss, pN': '800', 'Force offset, pN': '0', diff --git a/POTATO_find_steps.py b/POTATO_find_steps.py index 8fd3a24..c92c592 100644 --- a/POTATO_find_steps.py +++ b/POTATO_find_steps.py @@ -40,7 +40,7 @@ def moving_median(input_data, column_number, window_size): # sorting the data based on a x times STD threshold (normal distibuted noise vs extreme values from steps) -def cut_off(input_array, column_number, mm, std, n_of_std): +def cut_off(input_array, column_number, mm, std, z_score): # sort values - inside STD region, above STD region and below STD region F_values_inside = [] PD_values_inside = [] @@ -60,17 +60,18 @@ def cut_off(input_array, column_number, mm, std, n_of_std): i = 0 for n in range(0, len(input_array), 1): - if input_array[n, column_number] > mm[int(i)] + n_of_std * std: + if input_array[n, column_number] > mm[int(i)] + z_score * std: F_dt_above.append(input_array[n, 2]) F_values_above.append(input_array[n, 0]) PD_values_above.append(input_array[n, 1]) PD_dt_above.append(input_array[n, 3]) - elif input_array[n, column_number] < mm[int(i)] - n_of_std * std: + elif input_array[n, column_number] < mm[int(i)] - z_score * std: F_dt_below.append(input_array[n, 2]) F_values_below.append(input_array[n, 0]) PD_values_below.append(input_array[n, 1]) PD_dt_below.append(input_array[n, 3]) + else: F_dt_inside.append(input_array[n, 2]) F_values_inside.append(input_array[n, 0]) @@ -88,7 +89,7 @@ def cut_off(input_array, column_number, mm, std, n_of_std): # searching for minima in the force derivation to identify unfolding events -def find_steps_F(input_settings, filename_i, Force_Distance, der_arr): +def find_steps_F(input_settings, filename_i, Force_Distance, der_arr, orientation): global y_vector_F global F_mm2_STD2_positive global F_mm2_STD2_negative @@ -98,7 +99,7 @@ def find_steps_F(input_settings, filename_i, Force_Distance, der_arr): STD_1 = STD(der_arr, 2) F_mm = moving_median(der_arr, 2, input_settings['window_size']) - Above, Inside, Below, inside_indices_F = cut_off(der_arr, 2, F_mm, STD_1, input_settings['z-score']) + Above, Inside, Below, inside_indices_F = cut_off(der_arr, 2, F_mm, STD_1, input_settings['z-score_f']) F_mm2_STD2_positive = [] F_mm2_STD2_negative = [] @@ -107,7 +108,7 @@ def find_steps_F(input_settings, filename_i, Force_Distance, der_arr): while abs(STD_1 - STD(Inside, 2)) / STD_1 > input_settings['STD_diff']: F_mm = moving_median(Inside, 2, input_settings['window_size']) STD_1 = STD(Inside, 2) - Above, Inside, Below, inside_indices_F = cut_off(der_arr, 2, F_mm, STD_1, input_settings['z-score']) + Above, Inside, Below, inside_indices_F = cut_off(der_arr, 2, F_mm, STD_1, input_settings['z-score_f']) n_runs = n_runs + 1 if STD_1 < 0.05: @@ -115,7 +116,7 @@ def find_steps_F(input_settings, filename_i, Force_Distance, der_arr): print('STD is', STD_1) - Above, Inside, Below, inside_indices_F = cut_off(der_arr, 2, F_mm, STD_1, input_settings['z-score']) + Above, Inside, Below, inside_indices_F = cut_off(der_arr, 2, F_mm, STD_1, input_settings['z-score_f']) F_mm = moving_median(Inside, 2, input_settings['window_size']) y_vector_F = [] @@ -129,8 +130,8 @@ def find_steps_F(input_settings, filename_i, Force_Distance, der_arr): F_mm.insert(n, F_mm[last]) for i in range(len(F_mm)): - F_mm2_STD2_positive.append(F_mm[i] + input_settings['z-score'] * STD_1) - F_mm2_STD2_negative.append(F_mm[i] - input_settings['z-score'] * STD_1) + F_mm2_STD2_positive.append(F_mm[i] + input_settings['z-score_f'] * STD_1) + F_mm2_STD2_negative.append(F_mm[i] - input_settings['z-score_f'] * STD_1) # find the step points # for those steps that cross the STD2 threshold -> find the closest 0 values prior/following to the crossing one @@ -158,6 +159,7 @@ def find_steps_F(input_settings, filename_i, Force_Distance, der_arr): PD_start_F.append(der_arr[i_start, 1]) dict1 = { "filename": filename_i, + "orientation": orientation, "Derivation of": 'Force', 'step #': n_steps, 'F1': der_arr[i_start, 0], @@ -176,7 +178,7 @@ def find_steps_F(input_settings, filename_i, Force_Distance, der_arr): # searching for maxima in the distance derivation to identify unfolding events -def find_steps_PD(input_settings, filename_i, Force_Distance, der_arr): +def find_steps_PD(input_settings, filename_i, Force_Distance, der_arr, orientation): global y_vector_PD global PD_mm2_STD2_positive global PD_mm2_STD2_negative @@ -187,7 +189,7 @@ def find_steps_PD(input_settings, filename_i, Force_Distance, der_arr): STD_1 = STD(der_arr, 3) PD_mm = moving_median(der_arr, 3, input_settings['window_size']) - Above, Inside, Below, inside_indices_PD = cut_off(der_arr, 3, PD_mm, STD_1, input_settings['z-score']) + Above, Inside, Below, inside_indices_PD = cut_off(der_arr, 3, PD_mm, STD_1, input_settings['z-score_d']) PD_mm2_STD2_positive = [] PD_mm2_STD2_negative = [] @@ -196,7 +198,7 @@ def find_steps_PD(input_settings, filename_i, Force_Distance, der_arr): while abs(STD_1 - STD(Inside, 3)) / STD_1 > input_settings['STD_diff']: PD_mm = moving_median(Inside, 3, input_settings['window_size']) STD_1 = STD(Inside, 3) - Above, Inside, Below, inside_indices_PD = cut_off(der_arr, 3, PD_mm, STD_1, input_settings['z-score']) + Above, Inside, Below, inside_indices_PD = cut_off(der_arr, 3, PD_mm, STD_1, input_settings['z-score_d']) n_runs = n_runs + 1 if STD_1 < 0.05: @@ -204,7 +206,7 @@ def find_steps_PD(input_settings, filename_i, Force_Distance, der_arr): print('STD is', STD_1) - Above, Inside, Below, inside_indices_PD = cut_off(der_arr, 3, PD_mm, STD_1, input_settings['z-score']) + Above, Inside, Below, inside_indices_PD = cut_off(der_arr, 3, PD_mm, STD_1, input_settings['z-score_d']) PD_mm = moving_median(Inside, 3, input_settings['window_size']) y_vector_PD = [] @@ -218,13 +220,11 @@ def find_steps_PD(input_settings, filename_i, Force_Distance, der_arr): PD_mm.insert(n, PD_mm[last]) for i in range(len(PD_mm)): - PD_mm2_STD2_positive.append(PD_mm[i] + input_settings['z-score'] * STD_1) - PD_mm2_STD2_negative.append(PD_mm[i] - input_settings['z-score'] * STD_1) + PD_mm2_STD2_positive.append(PD_mm[i] + input_settings['z-score_d'] * STD_1) + PD_mm2_STD2_negative.append(PD_mm[i] - input_settings['z-score_d'] * STD_1) # find the step points # for those steps that cross the 3*STD2 threshold -> find the closest 0 values prior/following to the crossing one - # for local minima - loc_max = argrelextrema(Above[:, 3], np.greater) n_steps = 1 @@ -249,6 +249,7 @@ def find_steps_PD(input_settings, filename_i, Force_Distance, der_arr): dict1 = { "filename": filename_i, + "orientation": orientation, "Derivation of": 'Distance', 'step #': n_steps, 'F1': der_arr[i_start, 0], diff --git a/POTATO_fitting.py b/POTATO_fitting.py index 0f51f9c..7b962ad 100644 --- a/POTATO_fitting.py +++ b/POTATO_fitting.py @@ -10,18 +10,33 @@ """define the functions used for fitting""" -def fitting_ds(filename_i, input_settings, export_data, input_fitting, i_start, Force_Distance, derivation_array, F_low): +def fitting_ds(filename_i, input_settings, export_data, input_fitting, i_start, Force_Distance, derivation_array, F_low, TOMATO_param): global model_ds, fit_ds global ds_fit_dict global f_fitting_region_ds, d_fitting_region_ds global export_fit_ds global fitting_model - start_step1 = np.where(derivation_array[:, 1] == i_start) - start_step1 = start_step1[0][0] + if TOMATO_param == 0: + start_step1 = np.where(derivation_array[:, 1] == i_start) + start_step1 = start_step1[0][0] - f_fitting_region_ds = Force_Distance[0:start_step1 * input_settings['step_d'] + len(F_low), 0] - d_fitting_region_ds = Force_Distance[0:start_step1 * input_settings['step_d'] + len(F_low), 1] + f_fitting_region_ds = Force_Distance[0:start_step1 * input_settings['step_d'] + len(F_low), 0] + d_fitting_region_ds = Force_Distance[0:start_step1 * input_settings['step_d'] + len(F_low), 1] + + elif TOMATO_param == 1: + i_start = Force_Distance[-1, 1] + f_fitting_region_ds = Force_Distance[0] + d_fitting_region_ds = Force_Distance[1] + + delta_f = f_fitting_region_ds[-1] - f_fitting_region_ds[0] + delta_d = d_fitting_region_ds[-1] - d_fitting_region_ds[0] + d_f_ratio = delta_d / delta_f + + # downsample the data used for fitting with a dD/dF ratio + while len(f_fitting_region_ds) > 100 * d_f_ratio: + f_fitting_region_ds = f_fitting_region_ds[::2] + d_fitting_region_ds = d_fitting_region_ds[::2] model_ds = lk.inverted_odijk("ds_part").subtract_independent_offset() + lk.force_offset("ds_part") @@ -63,7 +78,7 @@ def fitting_ds(filename_i, input_settings, export_data, input_fitting, i_start, # calculate the integral until the first unfolding step # used to calculate the work done by the machine distance_integral = np.arange(min(Force_Distance[:, 1]), i_start) - ds_integral = model_ds(distance_integral, fit_ds) + ds_integral = model_ds(distance_integral, fit_ds.params) area_ds = simps(ds_integral) print("area_ds = " + str(area_ds)) @@ -71,6 +86,8 @@ def fitting_ds(filename_i, input_settings, export_data, input_fitting, i_start, ds_fit_dict = { 'filename': filename_i, 'model': 'WLC', + 'model_ds': model_ds, + 'fit_model': fit_ds, 'log_likelihood': fit_qual, 'Lc_ds': fit_ds["ds_part/Lc"].value, 'Lp_ds': fit_ds["ds_part/Lp"].value, @@ -174,12 +191,12 @@ def fitting_ss(filename_i, input_settings, export_data, input_fitting, i_start, # calculate the integrals of the fitted functions distance_integral_fit_start = np.arange(min(Force_Distance[:, 1]), i_start) - ss_integral_start = model_ss(distance_integral_fit_start, fit_ss) + ss_integral_start = model_ss(distance_integral_fit_start, fit_ss.params) area_ss_fit_start = simps(ss_integral_start) print("area_ss_start = " + str(area_ss_fit_start)) distance_integral_fit_end = np.arange(min(Force_Distance[:, 1]), i_end) - ss_integral_end = model_ss(distance_integral_fit_end, fit_ss) + ss_integral_end = model_ss(distance_integral_fit_end, fit_ss.params) area_ss_fit_end = simps(ss_integral_end) print("area_ss_end = " + str(area_ss_fit_end)) @@ -210,7 +227,7 @@ def fitting_ss(filename_i, input_settings, export_data, input_fitting, i_start, def plot_fit(fit, start_force_ss, start_distance_ss, Force_Distance, save_folder, filename_i, start_time): distance = np.arange(min(Force_Distance[:, 1]), max(Force_Distance[:, 1]) + 50, 2) - F_ds_model = model_ds(distance, fit_ds) + F_ds_model = model_ds(distance, fit_ds.params) legend_elements = [ Line2D([0], [0], color='k', lw=1, alpha=0.85), @@ -226,7 +243,7 @@ def plot_fit(fit, start_force_ss, start_distance_ss, Force_Distance, save_folder plt.legend(legend_elements, ['FD-Curve', 'Part used for fitting', 'Fitted WLC model']) for i in range(0, len(fit)): - F_ss_model = model_ss(distance, fit[i]) + F_ss_model = model_ss(distance, fit[i].params) plt.scatter(start_distance_ss[i], start_force_ss[i], s=4) plt.plot(distance, F_ss_model, linestyle='dashed', color='gray') diff --git a/POTATO_preprocessing.py b/POTATO_preprocessing.py index f1243a9..37687d5 100644 --- a/POTATO_preprocessing.py +++ b/POTATO_preprocessing.py @@ -13,13 +13,8 @@ def preprocess_RAW(Force, Distance, input_settings): b, a = signal.butter(input_settings['filter_degree'], input_settings['filter_cut_off']) filteredForce = signal.filtfilt(b, a, Force_ds) filteredDistance = signal.filtfilt(b, a, Distance_ds) - filteredDistance_ready = filteredDistance * 1000 - - Force_Distance = np.column_stack((filteredForce, filteredDistance_ready)) - - if Force_Distance[0, 1] > Force_Distance[-1, 1]: # reverse - Force_Distance = np.flipud(Force_Distance) + Force_Distance = np.column_stack((filteredForce, filteredDistance * 1000)) Force_Distance_um = np.column_stack((filteredForce, filteredDistance)) return Force_Distance, Force_Distance_um @@ -27,36 +22,28 @@ def preprocess_RAW(Force, Distance, input_settings): # creates a dataset from min force threshold to max force value def trim_data(FD_data, F_min): - global F_trimmed, PD_trimmed, F_low - - F_trimmed = [] - PD_trimmed = [] + F_trimmed = np.array([]) + PD_trimmed = np.array([]) + F_low = np.array([]) F_max = np.where(FD_data[:, 0] == max(FD_data[:, 0])) fi = F_max[0][0] - while FD_data[fi, 0] < FD_data[fi - 10, 0]: - fi = fi - 1 - - while FD_data[fi, 1] < FD_data[fi - 10, 1]: + while FD_data[fi, 0] > F_min and fi > 0: fi = fi - 1 - fi0 = fi - - fi = F_max[0][0] - # print(fi) - while FD_data[fi, 0] > F_min: - fi = fi - 1 - # print(Force_Distance[fi, 0]) - F_trimmed = FD_data[fi:fi0, 0] - PD_trimmed = FD_data[fi:fi0, 1] - F_low = FD_data[:fi, 0] + if not fi == F_max[0][0]: + F_trimmed = FD_data[fi:F_max[0][0], 0] + PD_trimmed = FD_data[fi:F_max[0][0], 1] + F_low = FD_data[:fi, 0] + elif fi == 0: + print('Could not trim this curve, data below minimum force threshold!') return F_trimmed, PD_trimmed, F_low # creates derivations for Force and Distance of the trimmed datasets -def create_derivation(input_settings, Frequency_value): +def create_derivation(input_settings, Frequency_value, F_trimmed, PD_trimmed, F_low): d_time = 1 / Frequency_value * input_settings['downsample_value'] * input_settings['step_d'] x = input_settings['step_d'] diff --git a/POTATO_processMultiH5.py b/POTATO_processMultiH5.py new file mode 100644 index 0000000..d21bd50 --- /dev/null +++ b/POTATO_processMultiH5.py @@ -0,0 +1,79 @@ +"""Copyright 2021 Helmholtz-Zentrum für Infektionsforschung GmbH""" + +import pandas as pd +import numpy as np + + +def split_H5(FD, input_settings, Frequency_value): + d_time = 1 / Frequency_value * input_settings['downsample_value'] * input_settings['step_d'] + d = Frequency_value // input_settings['downsample_value'] + d_half = int(0.5 * d) + derivation_list = [] + t = 0 + + for i in range(d_half, len(FD) - d_half): + PD_value = (FD[i + d_half, 1] + FD[i - d_half, 1]) / 2 + delta_PD = FD[i + d_half, 1] - FD[i - d_half, 1] + PD_dt = delta_PD / d_time + t = t + d_time + derivation_list.append([t, PD_value, PD_dt]) + + derivation_array = pd.DataFrame(derivation_list) + derivation_array = derivation_array.to_numpy() + + forward = [] + reverse = [] + n = [] + x = d_half + x_fw = [] + x_rv = [] + x_total = [] + + for i in range(len(derivation_array)): + if derivation_array[i, 2] > -2500 and derivation_array[i, 2] < 2500: + n.append(i + d_half) + x = len(n) + d_half + elif derivation_array[i, 2] > 2500: + x_fw.append(x) + x_total.append(x) + forward.append(FD[i + d_half]) + elif derivation_array[i, 2] < -2500: + x_rv.append(x) + x_total.append(x) + reverse.append(FD[i + d_half]) + + unique_fw = np.unique(x_fw) + unique_rv = np.unique(x_rv) + unique_total = np.unique(x_total) + + print('Fw:', len(unique_fw), ', Rev:', len(unique_rv), ', Together:', len(unique_total)) + + forward = np.vstack(forward) + reverse = np.vstack(reverse) + print(forward) + + fw_merge = np.column_stack((forward, x_fw)) + rv_merge = np.column_stack((reverse, x_rv)) + + fw_merge = np.split(fw_merge, np.where(np.diff(fw_merge[:, 2]))[0] + 1) + rv_merge = np.split(rv_merge, np.where(np.diff(rv_merge[:, 2]))[0] + 1) + + ############################# remove arrays that are way below average length + arr_length = 0 + count = 0 + for i in fw_merge: + arr_length += len(i) + count += 1 + for i in rv_merge: + arr_length += len(i) + count += 1 + arr_average = arr_length / count + + for i in fw_merge: + if len(i) < 0.5 * arr_average: + fw_merge.remove(i) + for i in rv_merge: + if len(i) < 0.5 * arr_average: + rv_merge.remove(i) + + return fw_merge, rv_merge diff --git a/POTATO_readme.txt b/POTATO_readme.txt index 92f73cd..85322d5 100644 --- a/POTATO_readme.txt +++ b/POTATO_readme.txt @@ -1,5 +1,3 @@ -"""Copyright 2021 Helmholtz-Zentrum für Infektionsforschung GmbH""" - *************** README - POTATO *************** This is the first version of POTATO and the pipeline is still in development. Therefore, there might be still some issues to be solved. @@ -61,6 +59,9 @@ For each analysis step, buttons are displayed in the different tabs and some of 3) CSV (F/D) Analyses all csv files in a given directory. The architecture of these files need to consist out of two columns (Force and Distance) without headers. + Force needs to be in pN. Distance can be either be in µm or nm. + The script can process forward (unfolding) as well as reverse (folding) curves. If the script should distinguish, + the reverse curves need to start with the highest distance and decrease from there. The data gathering frequency and all other parameters are derived from the user input in the GUI. **Parameters** @@ -69,14 +70,15 @@ For each analysis step, buttons are displayed in the different tabs and some of Upon changing, each parameter needs to be validated with the key. When the parameters are optimized, default parameters can be changed in the POTATO_config file, so they will be loaded when the GUI is started. + The parameters are read in once the analysis starts and for the force-ramp analysis the used parameters are exported in json format. **Output** POTATO creates an "Analysis" folder with timestamp in the analysed directory. - The "Refresh" button loads the last saved image and gives a progress report. + The "Refresh" button loads the last saved image and displays the progress in the GUI. In the "Advanced Settings" tab, several export settings can be set. 1) Processed FD data: - Exports the down-sampled and filtered Force-Distance-Array in CSV format. + Exports the down-sampled and filtered Force-Distance-Array in CSV format. The exported filename consists of the original filename and additional suffix "_smooth". 2) Plot Exports a figure (PNG) containing - the visualized processed data with and without marked unfolding events @@ -96,7 +98,7 @@ For each analysis step, buttons are displayed in the different tabs and some of Single FD-curves of all three input formats (h5-HF, h5-LF, CSV) can be displayed. **Output** - A FD-curve of the original input values, as well as the down sampled values are plotted in the GUI. This may help identify potential causes of errors. + A FD-curve of the original input values, as well as the down sampled values are plotted in the GUI. This may help identify potential causes of errors. ***Constant Force Analysis*** @@ -119,8 +121,8 @@ For each analysis step, buttons are displayed in the different tabs and some of ***Advanced Settings*** - This tab contains all the adjustable parameters in the POTATO. - The parameters are divided into several groups based on the part of analysis, in which they are used. + This tab contains all the adjustable parameters in the POTATO. + The parameters are divided into several groups based on the part of analysis, in which they are used. **Preprocessing** Downsample rate - Only every nth value is taken for analysis; speeds up subsequent processing. Butterworth Filter degree - Defines the stringency of the filter. @@ -134,18 +136,18 @@ For each analysis step, buttons are displayed in the different tabs and some of moving median window size - The number of values considered for each median calculation. SD difference threshold - Statistical analysis and data sorting are iterated until the difference between two consecutive SDs is below this value. **Fitting** - "WLC+WLC" or "WLC+FJC" tick option - determines whether the unfolded regions of FD curves will be fitted with model combining two WLC models or WLC and FJC models, repectively. + "WLC+WLC" or "WLC+FJC" tick option - determines whether the unfolded regions of FD curves will be fitted with model combining two WLC models or WLC and FJC models, repectively. dsLp, nm - Persistence length of the double-stranded (folded) part of the tethered construct. - dsLc, nm - Contour length of double-stranded (folded) part of the tethered construct. + dsLc, nm - Contour length of double-stranded (folded) part of the tethered construct. dsK0, pN - Stretch modulus of double-stranded (folded) part of the tethered construct. Force_offset, pN - Force offset of a given dataset; compensates for a shift in the dataset. Distance_offset, nm - Distance offset of a given dataset; compensates for a shift in the dataset. ssLp, nm - Persistence length of the single-stranded (unfolded) part of the tethered construct. ssLc, nm - Contour length of single-stranded (unfolded) part of the tethered construct. ssK0, pN - Stretch modulus of single-stranded (unfolded) part of the tethered construct. - + **Export** - Consists of tick options for marking the data files to be exported (ticked) or not (unticked) during the analysis. + Consists of tick options for marking the data files to be exported (ticked) or not (unticked) during the analysis. Processed FD data - Exports the down-sampled and filtered Force-Distance-Array in CSV format.The exported filename consists of the original filename and additional suffix "_smooth". Plot - Exports a figure (PNG) containing - the visualized processed data with and without marked unfolding events - the corresponding force- and distance derivations @@ -154,5 +156,3 @@ For each analysis step, buttons are displayed in the different tabs and some of Total results - Exports all steps from both derivations in CSV format. Fitting - Exports a plot with the fitted models and a table of the fitting parameters for each section in CSV format. When 'Fitting' is not selected, the script skips all fitting steps and therefore the analysis is much faster. - -