diff --git a/POTATO.ico b/POTATO.ico new file mode 100644 index 0000000..dac880e Binary files /dev/null and b/POTATO.ico differ diff --git a/POTATO_ForceRamp.py b/POTATO_ForceRamp.py new file mode 100644 index 0000000..5ffcf4b --- /dev/null +++ b/POTATO_ForceRamp.py @@ -0,0 +1,378 @@ + +import pandas as pd +import h5py +import numpy as np +from pathlib import Path + +# relative imports +from POTATO_fitting import fitting_ds, fitting_ss, plot_fit +from POTATO_preprocessing import preprocess_RAW, trim_data, create_derivation +from POTATO_find_steps import find_steps_F, find_steps_PD, find_common_steps, calc_integral, save_figure +"""define the functions of the subprocess processing the data""" + + +# open a folder containing raw data and lead through the analysis process +def start_subprocess(analysis_folder, timestamp, Files, input_settings, input_format, export_data, input_fitting, output_q): + # empty dataframe to store all step results of all curves in the folder + total_results_steps = pd.DataFrame() + + # create dataframe to store all fitting parameters of all curves in the folder + header_fit = [ + "filename", + "model", + "log_likelihood", + "Lc_ds", + "Lp_ds", + "Lp_ds_stderr", + "St_ds", + "Lc_ss", + "Lc_ss_stderr", + "Lp_ss", + "St_ss", + "f_offset", + "d_offset", + "Work_(pN*nm)", + "Work_(kB*T)" + ] + + total_results_fit = pd.DataFrame(columns=header_fit) + + # iterate through the files in the selected folder + i = 0 + # proceed differently with h5 and csv files + while i < len(Files): + if input_format['CSV'] == 1: + df = pd.read_csv(Files[i]) + directory_i = Path(Files[i]) + filename_i = directory_i.name[:-4] + # access the raw data + Force_1x = df.to_numpy()[:, 0] + Distance_1x = df.to_numpy()[:, 1] + # accessing the data frequency from user input + Frequency_value = input_settings['data_frequency'] + Force_Distance, Force_Distance_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) + else: + with h5py.File(Files[i], "r") as f: + directory_i = Path(Files[i]) + filename_i = directory_i.name[:-3] + + # access the raw data + if input_format['HF'] == 1: + Force_1x = f.get("Force HF/Force 1x") + Distance_1x = f.get("Distance/Piezo Distance") + # accessing the data frequency from the h5 file + Frequency_value = Force_1x.attrs['Sample rate (Hz)'] + Force_Distance, Force_Distance_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) + + elif input_format['LF'] == 1: + load_force = f.get("Force LF/Force 1x") + Force_1x = load_force[:]['Value'][:] + load_distance = f.get("Distance/Distance 1")[:] + Distance_1x = load_distance['Value'][:] + Force_Distance = np.column_stack((Force_1x, Distance_1x)) + + # calculating the data frequency based on start- and end-time of the measurement + size_F_LF = len(Force_1x) + stop_time_F_LF = load_force.attrs['Stop time (ns)'] + timestamp_F_LF = load_force.attrs['Start time (ns)'] + + Frequency_value = size_F_LF / ((stop_time_F_LF - timestamp_F_LF) / 10**9) + Force_Distance, Force_Distance_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) + + # Export down sampled and smoothened FD values + if export_data['export_SMOOTH'] == 1: + filename = analysis_folder + "/" + filename_i + "_smooth_" + timestamp + ".csv" + np.savetxt(filename, Force_Distance_um, delimiter=",") + else: + pass + + # trim data below specified force thresholds + F_trimmed, PD_trimmed, F_low = trim_data(Force_Distance, input_settings['F_min']) + + # create force and distance derivation of the pre-processed data to be able to identify steps + derivation_array = create_derivation(input_settings, Frequency_value) + + """find steps based on force derivation""" + filename_results = analysis_folder + "/" + filename_i + "_results_" + timestamp + ".csv" + + try: + results_F, PD_start_F = find_steps_F( + input_settings, + filename_i, + Force_Distance, + derivation_array + ) + + results_F_list = list(results_F) + + if export_data['export_STEPS'] == 1: + steps_results_F = pd.DataFrame(results_F_list) + with open(filename_results, 'a+') as f: + f.write('\nSteps found by force derivation:\n') + steps_results_F.to_csv(filename_results, mode='a', index=False, header=True) + else: + pass + + except: + results_F = [] + PD_start_F = [] + print("Error in finding steps for file " + str(filename_i) + '\n' 'There was an error in finding Force steps') + pass + + """find steps based on distance derivation""" + + try: + results_PD, PD_start_PD = find_steps_PD( + input_settings, + filename_i, + Force_Distance, + derivation_array + ) + + results_PD_list = list(results_PD) + + if export_data['export_STEPS'] == 1: + steps_results_PD = pd.DataFrame(results_PD_list) + with open(filename_results, 'a+') as f: + f.write('\nSteps found by distance derivation:\n') + steps_results_PD.to_csv(filename_results, mode='a', index=False, header=True) + + except: + results_PD = [] + PD_start_PD = [] + err_PD = str("Error in finding steps for file " + str(filename_i) + '\n' 'There was an error in finding Distance steps') + print(err_PD) + pass + + # save plot with FD-curve, derivations and found steps + save_figure( + export_data['export_PLOT'], + timestamp, + filename_i, + analysis_folder, + Force_Distance, + derivation_array, + F_trimmed, + PD_trimmed, + PD_start_F, + PD_start_PD + ) + + # when steps are found by force AND distance derivation, they are considered common steps + try: + common_steps = find_common_steps(results_F_list, results_PD_list) + # to match with the fitting rows (always one more than steps) put a 'step 0' as first line + common_steps_results = [{'filename': filename_i, 'Derivation of': '', 'step #': 0, 'F1': '', 'F2': '', 'Fc': '', 'step start': '', 'step end': '', 'step length': ''}] + except: + err_FCS = str("Error in finding common steps" + str(filename_i) + '\n' 'There was an error in finding common steps') + output_q.put(err_FCS) + pass + + # append common steps to the 'step 0' + if common_steps: + for x in range(len(common_steps)): + common_steps_results.append(common_steps[x]) + + # convert common steps to dataframe for export + common_steps_results = pd.DataFrame(common_steps_results) + + # export the steps into the results for ONLY this file + with open(filename_results, 'a+') as f: + f.write('\nCommon steps:\n') + common_steps_results.to_csv(filename_results, mode='a', index=False, header=True) + + # put common steps into a total_results dataframe so all steps from all files of the analysed folder can be exported together + total_results_steps = total_results_steps.append(common_steps_results, ignore_index=True, sort=False) + + else: + common_steps_results = [{'filename': filename_i, 'Derivation of': '', 'step #': 'no common steps', 'F1': '', 'F2': '', 'Fc': '', 'step start': '', 'step end': '', 'step length': ''}] + total_results_steps = total_results_steps.append(common_steps_results, ignore_index=True, sort=False) + + '''if common steps were found, try to fit FD-Curve''' + empty = { + 'filename': filename_i, + 'model': 'None', + 'log_likelihood': 'None', + 'Lc_ds': 'None', + 'Lp_ds': 'None', + 'Lp_ds_stderr': 'None', + 'St_ds': 'None', + 'f_offset': 'None', + 'd_offset': 'None' + } + + if export_data['export_FIT'] == 1: + try: + export_fit = [] + fit = [] + start_force_ss = [] + start_distance_ss = [] + integral_ss_fit_start = [] + integral_ss_fit_end = [] + + # try to fit all parts of curve based on the common steps + try: + # fit part between start of the FD-cure up to the first common step + export_fit_ds, area_ds = fitting_ds( + filename_i, + input_settings, + export_data, + input_fitting, + float(common_steps[0]['step start']), + Force_Distance, + derivation_array, + F_low + ) + + export_fit.append(export_fit_ds) + + # fit parts after steps, when more than one common step was found, there are multiple parts to fit + if len(common_steps) > 1: + for n in range(0, len(common_steps) - 1): + # try to fit each part of the curve, if one of the parts can not be fitted, still try to fit the others + try: + fit_ss, f_fitting_region_ss, d_fitting_region_ss, export_fit_ss, area_ss_fit_start, area_ss_fit_end = fitting_ss( + filename_i, + input_settings, + export_data, + input_fitting, + float(common_steps[n]['step end']), + float(common_steps[n + 1]['step start']), + Force_Distance, 1, 1, + derivation_array, + F_low + ) + + fit.append(fit_ss) + start_force_ss.append(f_fitting_region_ss) + start_distance_ss.append(d_fitting_region_ss) + export_fit.append(export_fit_ss) + integral_ss_fit_start.append(area_ss_fit_start) + integral_ss_fit_end.append(area_ss_fit_end) + + except: + export_fit.append(empty) + pass + + # fit the last part of the curve + try: + fit_ss, f_fitting_region_ss, d_fitting_region_ss, export_fit_ss, area_ss_fit_start, area_ss_fit_end = fitting_ss( + filename_i, + input_settings, + export_data, + input_fitting, + float(common_steps[len(common_steps) - 1]['step end']), + max(derivation_array[:, 1]), + Force_Distance, 1, 1, + derivation_array, + F_low + ) + + fit.append(fit_ss) + start_force_ss.append(f_fitting_region_ss) + start_distance_ss.append(d_fitting_region_ss) + export_fit.append(export_fit_ss) + integral_ss_fit_start.append(area_ss_fit_start) + integral_ss_fit_end.append(area_ss_fit_end) + + except: + export_fit.append(empty) + pass + + '''from the fits, work put into the system is calculated''' + if common_steps: + work_per_step = [0] # in pN*nm + kT_per_step = [0] # in kT + + work_first_step, kT_1 = calc_integral( + area_ds, + integral_ss_fit_start[0], + common_steps[0]['step start'], + common_steps[0]['step end'], + common_steps[0]['F1'], + common_steps[0]['F2'] + ) + + print("Work of first step: " + str(work_first_step)) + work_per_step.append(work_first_step) + kT_per_step.append(kT_1) + + if len(common_steps) > 1: + for n in range(0, len(common_steps) - 1): + work_step_n, kT_n = calc_integral( + integral_ss_fit_end[n], + integral_ss_fit_start[n + 1], + common_steps[n + 1]['step start'], + common_steps[n + 1]['step end'], + common_steps[n + 1]['F1'], + common_steps[n + 1]['F2'] + ) + + work_per_step.append(work_step_n) + kT_per_step.append(kT_n) + + j = 0 + for dict in export_fit: + dict["Work_(pN*nm)"] = work_per_step[j] + dict["Work_(kB*T)"] = kT_per_step[j] + j += 1 + + # if no step was found, the common step index 0 is not available and will raise an IndexError. + # So in this case the fit will be performed for the whole curve from beginning to end. + except IndexError: + if not common_steps: + export_fit_ds, area_ds = fitting_ds( + filename_i, + input_settings, + export_data, + input_fitting, + derivation_array[-1, 1], + Force_Distance, + derivation_array, + F_low + ) + + export_fit.append(export_fit_ds) + + total_results_fit = total_results_fit.append(export_fit, ignore_index=True, sort=False) + + # create a plot for the fitted curve + plot_fit(fit, start_force_ss, start_distance_ss, Force_Distance, analysis_folder, filename_i, timestamp) + + except: + print('Something went wrong with fitting') + pass + + if i == 0: + print('\nHard work ahead!\n') + output_q.put('Hard work ahead!') + elif i == int(len(Files) / 2): + print('\nHalf way there!\n') + output_q.put('Half way there!') + print() + elif i == len(Files) - 1: + print('\nAlmost there!\n') + output_q.put('Almost there!') + elif i == len(Files): + print('Analysis finished! \nProgram can be closed.') + output_q.put('Analysis finished! \nProgram can be closed.') + + i = i + 1 + print('done', i, 'from', len(Files)) + out_progress = str('Done ' + str(i) + ' from ' + str(len(Files))) + output_q.put(out_progress) + + print(filename_i) + output_q.put(filename_i) + + '''after folder analysis is done, export total results (all steps + fit parameters) in one file''' + if export_data['export_TOTAL'] == 1: + filename_total_results = analysis_folder + '/total_results_' + timestamp + '.csv' + + with open(filename_total_results, 'w') as f: + f.write('Common steps from all curves of the folder:\n') + + results_total_total = pd.concat([total_results_steps, total_results_fit], axis=1) + results_total_total.to_csv((filename_total_results), mode='a', index=False) + else: + pass diff --git a/POTATO_GUI.py b/POTATO_GUI.py new file mode 100644 index 0000000..b64a1e2 --- /dev/null +++ b/POTATO_GUI.py @@ -0,0 +1,905 @@ +""" POTATO -- 2021-10-14 -- Version 0.1 + Developed by Lukáš Pekárek and Stefan Buck at the Helmholtz Institute for RNA-based Infection Research + In the research group REMI - Recoding Mechanisms in Infections + Supervisor - Jun. Prof. Neva Caliskan """ +""" This script processes Force-Distance Optical Tweezers data in an automated way, to find unfolding events """ +""" The script is developed to handle h5 raw data, produced from the C-Trap OT machine from Lumicks, + as well as any other FD data prepared in a csv file (2 columns: Force(pN) - Distance(um)) """ +""" Furthermore the script can analyse single constant force files """ +""" The parameters can be changed in the GUI before each run. + Alternatively they can be changed permanently in the POTATO_config file""" + +import tkinter as tk +from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg +from matplotlib.figure import Figure +from matplotlib.lines import Line2D +from tkinter import filedialog +from tkinter import ttk +from tkinter import messagebox +from PIL import ImageTk, Image +import pandas as pd +import os +import h5py +import glob +import time +import multiprocessing as mp +import json + +# relative imports +from POTATO_ForceRamp import start_subprocess +from POTATO_preprocessing import preprocess_RAW +from POTATO_config import default_values_HF, default_values_LF, default_values_CSV, default_values_FIT, default_values_constantF +from POTATO_constantF import get_constantF, display_constantF, fit_constantF + +# To avoid blurry GUI - DPI scaling +import ctypes +awareness = ctypes.c_int() +errorCode = ctypes.windll.shcore.GetProcessDpiAwareness(0, ctypes.byref(awareness)) +errorCode = ctypes.windll.shcore.SetProcessDpiAwareness(1) + + +"""define the functions used in the GUI""" + + +# get settings, get folder directory, create analysis results folder +def start_analysis(): + global p0 + global analysis_folder + global image_number + # check user input + input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() + image_number = [] + # ask wich directory should be analysed + folder = filedialog.askdirectory() + root.title('POTATO -- ' + str(folder)) + + # decide which input format was choosen + if input_format['CSV'] == 1: + folder_path = str(folder + "/*.csv") + else: + folder_path = str(folder + "/*.h5") + + Files = glob.glob(folder_path) + print('files to analyse', len(Files)) + + # print number of files to analyse, if no files found give an error + output_window.insert("end", 'Files to analyse: ' + str(len(Files)) + "\n") + output_window.see("end") + if not len(Files) == 0: + output_window.insert("end", 'Analysis in progress. Please do not close the program! \n') + + # print starting time of the analysis + timestamp = time.strftime("%Y%m%d-%H%M%S") + print("Timestamp: " + timestamp) + output_window.insert("end", 'Start of analysis: ' + str(timestamp) + "\n") + output_window.see("end") + + # create a folder for the analysis results + analysis_folder = str(folder + '/Analysis_' + timestamp) + os.mkdir(analysis_folder) + + # export configuration file with used parameters + export_settings(analysis_folder, timestamp, input_settings, input_fitting) + + # start analysis in a new process + p0 = mp.Process(target=start_subprocess, name='Process-0', args=( + analysis_folder, + timestamp, + Files, + input_settings, + input_format, + export_data, + input_fitting, + output_q, + )) + + p0.daemon = True + p0.start() + + else: + output_window.insert("end", 'No file of the selected data type in the folder! \n') + output_window.see("end") + + +# display default values in the GUI +def parameters(frame, default_values, default_fit, default_constantF): + downsample_value1.delete(0, "end") + downsample_value1.insert("end", default_values['Downsampling rate']) + downsample_value2.delete(0, "end") + downsample_value2.insert("end", default_values['Downsampling rate']) + Filter_degree1.delete(0, "end") + Filter_degree1.insert("end", default_values['Butterworth filter degree']) + Filter_degree2.delete(0, "end") + Filter_degree2.insert("end", default_values['Butterworth filter degree']) + Filter_cut_off1.delete(0, "end") + Filter_cut_off1.insert("end", default_values['Cut-off frequency']) + Filter_cut_off2.delete(0, "end") + Filter_cut_off2.insert("end", default_values['Cut-off frequency']) + Force_Min1.delete(0, "end") + Force_Min1.insert("end", default_values['Force threshold, pN']) + Force_Min2.delete(0, "end") + Force_Min2.insert("end", default_values['Force threshold, pN']) + STD1_threshold1.delete(0, "end") + STD1_threshold1.insert("end", default_values['Z-score']) + STD1_threshold2.delete(0, "end") + STD1_threshold2.insert("end", default_values['Z-score']) + step_d_value.delete(0, "end") + step_d_value.insert("end", str(default_values['Step d'])) + window_size_value.delete(0, "end") + window_size_value.insert("end", str(default_values['Moving median window size'])) + STD_difference_value.delete(0, "end") + STD_difference_value.insert("end", str(default_values['STD difference threshold'])) + Frequency_value.delete(0, "end") + Frequency_value.insert("end", str(default_values['Data frequency, Hz'])) + + dsLp.delete(0, "end") + dsLp.insert("end", str(default_fit['Persistance-Length ds, nm'])) + dsLp_up.delete(0, "end") + dsLp_up.insert("end", str(default_fit['Persistance-Length ds, upper bound, nm'])) + dsLp_low.delete(0, "end") + dsLp_low.insert("end", str(default_fit['Persistance-Length ds, lower bound, nm'])) + ssLp.delete(0, "end") + ssLp.insert("end", str(default_fit['Persistance-Length ss, nm'])) + dsLc.delete(0, "end") + dsLc.insert("end", str(default_fit['Contour-Length ds, nm'])) + ssLc.delete(0, "end") + ssLc.insert("end", str(default_fit['Persistance-Length ss, nm'])) + stiff_ds.delete(0, "end") + stiff_ds.insert("end", str(default_fit['Stiffness ds, pN'])) + stiff_ds_up.delete(0, "end") + stiff_ds_up.insert("end", str(default_fit['Stiffness ds, upper bound, pN'])) + stiff_ds_low.delete(0, "end") + stiff_ds_low.insert("end", str(default_fit['Stiffness ds, lower bound, pN'])) + stiff_ss.delete(0, "end") + stiff_ss.insert("end", str(default_fit['Stiffness ss, pN'])) + f_off.delete(0, "end") + f_off.insert("end", str(default_fit['Force offset, pN'])) + f_off_up.delete(0, "end") + f_off_up.insert("end", str(default_fit['Force offset, upper bound, pN'])) + f_off_low.delete(0, "end") + f_off_low.insert("end", str(default_fit['Force offset, lower bound, pN'])) + d_off.delete(0, "end") + d_off.insert("end", str(default_fit['Distance offset, nm'])) + d_off_up.delete(0, "end") + d_off_up.insert("end", str(default_fit['Distance offset, upper bound, nm'])) + d_off_low.delete(0, "end") + d_off_low.insert("end", str(default_fit['Distance offset, lower bound, nm'])) + + x_min.delete(0, "end") + x_min.insert("end", str(default_constantF['x min'])) + x_max.delete(0, "end") + x_max.insert("end", str(default_constantF['x max'])) + y_min.delete(0, "end") + y_min.insert("end", str(default_constantF['y min'])) + y_max.delete(0, "end") + y_max.insert("end", str(default_constantF['y max'])) + number_gauss.delete(0, "end") + number_gauss.insert("end", str(default_constantF['Number gauss'])) + mean_gauss.delete(0, "end") + mean_gauss.insert("end", str(default_constantF['Mean'])) + STD_gauss.delete(0, "end") + STD_gauss.insert("end", str(default_constantF['STD'])) + amplitude_gauss.delete(0, "end") + amplitude_gauss.insert("end", str(default_constantF['Amplitude'])) + + +# update value that has been changed (needs event ) +def user_input(event, param1, param2): + new_param = param1.get() + param1.delete(0, "end") + param2.delete(0, "end") + param1.insert("end", new_param) + param2.insert("end", new_param) + + +# get all settings from the user input before start of the analysis +def check_settings(): + input_settings = { + 'downsample_value': int(downsample_value2.get()), + 'filter_degree': int(Filter_degree2.get()), + 'filter_cut_off': float(Filter_cut_off2.get()), + 'F_min': float(Force_Min2.get()), + 'step_d': int(step_d_value.get()), + 'z-score': float(STD1_threshold2.get()), + 'window_size': int(window_size_value.get()), + 'data_frequency': float(Frequency_value.get()), + 'STD_diff': float(STD_difference_value.get()) + } + + input_format = { + 'HF': check_box_HF.get(), + 'LF': check_box_LF.get(), + 'CSV': check_box_CSV.get() + } + + export_data = { + 'export_SMOOTH': check_box_smooth_data.get(), + 'export_PLOT': check_box_plot.get(), + 'export_STEPS': check_box_steps.get(), + 'export_TOTAL': check_box_total_results.get(), + 'export_FIT': check_box_fitting.get() + } + + input_fitting = { + 'WLC+WLC': int(check_box_WLC.get()), + 'WLC+FJC': int(check_box_FJC.get()), + 'lp_ds': float(dsLp.get()), + 'lp_ds_up': float(dsLp_up.get()), + 'lp_ds_low': float(dsLp_low.get()), + 'lc_ds': float(dsLc.get()), + 'lp_ss': float(ssLp.get()), + 'lc_ss': float(ssLc.get()), + 'ds_stiff': float(stiff_ds.get()), + 'ds_stiff_up': float(stiff_ds_up.get()), + 'ds_stiff_low': float(stiff_ds_low.get()), + 'ss_stiff': float(stiff_ss.get()), + 'offset_f': float(f_off.get()), + 'offset_f_up': float(f_off_up.get()), + 'offset_f_low': float(f_off_low.get()), + 'offset_d': float(d_off.get()), + 'offset_d_up': float(d_off_up.get()), + 'offset_d_low': float(d_off_low.get()) + } + + input_constantF = { + 'x min': int(x_min.get()), + 'x max': int(x_max.get()), + 'y min': int(y_min.get()), + 'y max': int(y_max.get()), + 'Number gauss': int(number_gauss.get()), + 'Mean': mean_gauss.get(), + 'STD': STD_gauss.get(), + 'Amplitude': amplitude_gauss.get() + } + + return input_settings, input_format, export_data, input_fitting, input_constantF + + +def export_settings(analysis_path, timestamp, input_1, input_2): + with open(str(analysis_path + '/parameters_' + timestamp + '.txt'), 'w') as config_used: + config_used.write('Data processing:\n') + config_used.write(json.dumps(input_1, indent=4, sort_keys=False)) + config_used.write('\n\n') + config_used.write('Fitting parameters:\n') + config_used.write(json.dumps(input_2, indent=4, sort_keys=False)) + + +# Looks for output of the subprocess +def refresh(): + global new_image + while output_q.empty() is False: + output = output_q.get() + output_window.insert("end", "\n" + output + "\n") + output_window.see("end") + try: + images = str(analysis_folder + "/*plot*.png") + list_images = glob.glob(images) + img = Image.open(list_images[-1]) + resized = img.resize((1000, 650), Image.ANTIALIAS) + new_image = ImageTk.PhotoImage(resized) + figure_frame.create_image(0, 0, image=new_image, anchor="nw") + except: + pass + + +def readme(): + with open("POTATO_readme.txt", "r") as f: + help_text = f.read() + help_window = tk.Toplevel(root) + help_window.title("Readme") + text = tk.Text(help_window, height=25, width=200) + text.grid(row=0, column=0, sticky="nw") + text.insert("end", help_text) + + +# display a single h5 file (tab2) +def getRAW_File_h5(): + input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() + import_file_path = filedialog.askopenfilename() + + with h5py.File(import_file_path, "r") as raw_data: + # access the raw data + if input_format['HF'] == 1: + Force_1x = raw_data.get("Force HF/Force 1x") + Distance_1x = raw_data.get("Distance/Piezo Distance") + + elif input_format['LF'] == 1: + load_force = raw_data.get("Force LF/Force 1x") + Force_1x = load_force[:]['Value'][:] + load_distance = raw_data.get("Distance/Distance 1")[:] + Distance_1x = load_distance['Value'][:] + print(Force_1x, Distance_1x) + FD, FD_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) + display_RAW_FD(FD[:, 0], FD[:, 1], Force_1x[::input_settings['downsample_value']], Distance_1x[::input_settings['downsample_value']] * 1000) + + +# display a single csv file (tab2) +def getRAW_File_csv(): + if not check_box_CSV.get() == 1: + check_box_CSV.set(value=1) + select_box(check_box_CSV, check_box_HF, check_box_LF) + parameters(parameter_frame, default_values_CSV, default_values_FIT, default_values_constantF) + parameters(tab3, default_values_CSV, default_values_FIT, default_values_constantF) + else: + pass + + input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() + import_file_path = filedialog.askopenfilename() + + df = pd.read_csv(import_file_path) + + Force_HF_1x = df.to_numpy()[:, 0] + Distance_HF_1x = df.to_numpy()[:, 1] + + FD, FD_um = preprocess_RAW(Force_HF_1x, Distance_HF_1x, input_settings) + display_RAW_FD(FD[:, 0], FD[:, 1], Force_HF_1x[::input_settings['downsample_value']], Distance_HF_1x[::input_settings['downsample_value']] * 1000) + + +# create the plot for tab2 +def display_RAW_FD(processed_F, processed_D, raw_F, raw_D): + single_fd = Figure(figsize=(10, 6), dpi=100) + subplot1 = single_fd.add_subplot(111) + + legend_elements = [ + Line2D([0], [0], color='C0', lw=4), + Line2D([0], [0], color='C1', lw=4) + ] + + subplot1.set_xlabel("Distance (nm)") + subplot1.set_ylabel("Force (pN)") + subplot1.plot(raw_D, raw_F, alpha=0.8, color='C0', zorder=0) + subplot1.scatter(processed_D, processed_F, marker='.', s=0.1, linewidths=None, alpha=1, color='C1', zorder=1) + subplot1.legend(legend_elements, ['Downsampled FD-Data', 'Filtered FD-Data']) + + figure_raw = FigureCanvasTkAgg(single_fd, figure_frame2) + figure_raw.get_tk_widget().grid(row=0, column=0, sticky='wens') + + tabControl.select(tab2) + + +def start_constantF(): + input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() + Force_Distance, Force_Distance_um, frequency, filename, analysis_path, timestamp = get_constantF(input_settings, input_format, input_constantF) + fig_constantF, hist_D, filteredDistance_ready = display_constantF(Force_Distance, Force_Distance_um, frequency, input_settings, input_constantF) + os.mkdir(analysis_path) + export_settings(analysis_path, timestamp, input_settings, input_constantF) + fig_constantF = fit_constantF(hist_D, Force_Distance, filteredDistance_ready, frequency, input_settings, input_constantF, filename, timestamp) + fig_constantF_tk = FigureCanvasTkAgg(fig_constantF, figure_frame_tab4) + fig_constantF_tk.get_tk_widget().grid(row=0, column=0, sticky='wens') + + tabControl.select(tab4) + + +def show_constantF(): + input_settings, input_format, export_data, input_fitting, input_constantF = check_settings() + Force_Distance, Force_Distance_um, frequency, filename, analysis_path, timestamp = get_constantF(input_settings, input_format, input_constantF) + fig_constantF, hist_D, filteredDistance_ready = display_constantF(Force_Distance, Force_Distance_um, frequency, input_settings, input_constantF) + fig_constantF_tk = FigureCanvasTkAgg(fig_constantF, figure_frame_tab4) + fig_constantF_tk.get_tk_widget().grid(row=0, column=0, sticky='wens') + + tabControl.select(tab4) + + +def on_closing(): + # makes sure all python processes/loops are cancelled before exiting + if messagebox.askokcancel("Quit", "Do you really want to quit?"): + root.quit() + + +""" start the main process and Tkinter application """ +if __name__ == '__main__': + mp.freeze_support() + root = tk.Tk() + root.iconbitmap('POTATO.ico') + root.title("POTATO -- Practical Optical Tweezers Analysis TOol") + + output_q = mp.Queue() + + # create a drop down menu + drop_down_menu = tk.Menu(root) + root.config(menu=drop_down_menu) + + # first drop down possibility: File + file_menu = tk.Menu(drop_down_menu, tearoff=0) + drop_down_menu.add_cascade(label='File', menu=file_menu) + file_menu.add_command(label='Analyse folder (FD curves)', command=start_analysis) + file_menu.add_command(label='Display single FD curve (h5)', command=getRAW_File_h5) + file_menu.add_command(label='Display single FD curve (csv)', command=getRAW_File_csv) + file_menu.add_separator() + file_menu.add_command(label='Display constant force', command=show_constantF) + file_menu.add_command(label='Fit constant force', command=start_constantF) + + # second drop down possibility: Settings + settings_menu = tk.Menu(drop_down_menu, tearoff=0) + drop_down_menu.add_cascade(label='Settings', menu=settings_menu) + settings_menu.add_command(label='Set advanced settings', command=lambda: tabControl.select(tab3)) + + # third drop down possibility: Help + help_menu = tk.Menu(drop_down_menu, tearoff=0) + drop_down_menu.add_cascade(label='Help', menu=help_menu) + help_menu.add_command(label='Readme', command=readme) + + # Create different GUI tabs + tabControl = ttk.Notebook(root) + tabControl.grid(row=0, column=0, padx=2, pady=2) + + tab1 = ttk.Frame(tabControl, width=800, height=600) + tab2 = ttk.Frame(tabControl, width=800, height=600) + tab3 = ttk.Frame(tabControl, width=800, height=600) + tab4 = ttk.Frame(tabControl, width=800, height=600) + + tab1.grid(row=0, column=0, padx=2, pady=2) + tab2.grid(row=0, column=0, padx=2, pady=2) + tab3.grid(row=0, column=0, padx=2, pady=2) + tab4.grid(row=0, column=0, padx=2, pady=2) + + # ATTENTION - tab3 and tab4 are displayed the other way round in the GUI + tabControl.add(tab1, text="Folder Analysis") + tabControl.add(tab2, text="Show Single File") + tabControl.add(tab4, text="Constant Force Analysis") + tabControl.add(tab3, text="Advanced Settings") + + """ divide the tab1 into frames """ + # output window + output_frame = tk.Frame(tab1, height=50) + output_frame.grid(row=0, column=0) + output_window = tk.Text(output_frame, height=6, width=115) + output_window.grid(row=0, column=0) + output_window.insert( + "end", + "Welcome to POTATO! \n" + "Please make sure to select the right datatype -----------------------------------------------------------------> \n" + "Parameters should be adjusted and validated with .\n" + "Folders with multiple files can be analysed at once.\n" + ) + + refresh_button = tk.Button( + output_frame, + text='Refresh', + command=refresh, + bg='#df4c4c', + activebackground='#eaa90d', + font='Helvetica 7 bold', + height=3, + width=6, + cursor="exchange" + ) + + refresh_button.grid(row=0, column=1, padx=5) + + # check boxes + check_box = tk.Frame(tab1) + check_box.grid(row=0, column=1) + + def select_box(check_box_1, check_box_2, check_box_3): + if check_box_1.get() == 1: + check_box_2.set(value=0) + check_box_3.set(value=0) + elif check_box_1.get() == 0 and check_box_2.get() == 0 and check_box_3.get() == 0: + check_box_1.set(value=1) + + check_box_HF = tk.IntVar(value=1) + check_box_LF = tk.IntVar() + check_box_CSV = tk.IntVar() + + check1 = tk.Checkbutton( + check_box, + text="High Frequency (Piezo Distance)", + variable=check_box_HF, + command=lambda: [select_box(check_box_HF, check_box_LF, check_box_CSV), parameters(parameter_frame, default_values_HF, default_values_FIT, default_values_constantF)] + ).grid(row=0, column=0, sticky='W') + + check2 = tk.Checkbutton( + check_box, + text="Low Frequency", + variable=check_box_LF, + command=lambda: [select_box(check_box_LF, check_box_HF, check_box_CSV), parameters(parameter_frame, default_values_LF, default_values_FIT, default_values_constantF)] + ).grid(row=1, column=0, sticky='W') + + check3 = tk.Checkbutton( + check_box, + text="CSV (F/D)", + variable=check_box_CSV, + command=lambda: [select_box(check_box_CSV, check_box_HF, check_box_LF), parameters(parameter_frame, default_values_CSV, default_values_FIT, default_values_constantF)] + ).grid(row=2, column=0, sticky='W') + + figure_frame = tk.Canvas(tab1, height=650, width=1000, borderwidth=1, relief='ridge') + figure_frame.grid(row=1, column=0) + + parameter_frame = tk.Frame(tab1) + parameter_frame.grid(row=1, column=1, sticky='NE') + + """ parameter frame """ + Cluster_preprocessing = tk.Label(parameter_frame, text='PREPROCESSING', font='Helvetica 9 bold') + Label_downsample = tk.Label(parameter_frame, text='Downsampling rate') + Label_Filter1 = tk.Label(parameter_frame, text='Butterworth filter degree') + Label_Filter2 = tk.Label(parameter_frame, text='Cut-off frequency') + Label_ForceMin = tk.Label(parameter_frame, text='Force threshold, pN') + Cluster_statistics = tk.Label(parameter_frame, text='STATISTICS', font='Helvetica 9 bold') + Label_STD_1 = tk.Label(parameter_frame, text='Z-score') + + downsample_value1 = tk.Entry(parameter_frame) + downsample_value1.bind("", lambda event: user_input(event, downsample_value1, downsample_value2)) + + Filter_degree1 = tk.Entry(parameter_frame) + Filter_degree1.bind("", lambda event: user_input(event, Filter_degree1, Filter_degree2)) + + Filter_cut_off1 = tk.Entry(parameter_frame) + Filter_cut_off1.bind("", lambda event: user_input(event, Filter_cut_off1, Filter_cut_off2)) + + Force_Min1 = tk.Entry(parameter_frame) + Force_Min1.bind("", lambda event: user_input(event, Force_Min1, Force_Min2)) + + STD1_threshold1 = tk.Entry(parameter_frame) + STD1_threshold1.bind("", lambda event: user_input(event, STD1_threshold1, STD1_threshold2)) + + Cluster_preprocessing.grid(row=0, column=0, padx=2, pady=(20, 2)) + Label_downsample.grid(row=1, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + downsample_value1.grid(row=1, column=1, padx=2, pady=2) + + Label_Filter1.grid(row=2, column=0, padx=2, pady=2) + Filter_degree1.grid(row=2, column=1, padx=2, pady=2) + + Label_Filter2.grid(row=3, column=0, padx=2, pady=2) + Filter_cut_off1.grid(row=3, column=1, padx=2, pady=2) + + Label_ForceMin.grid(row=4, column=0, padx=2, pady=2) + Force_Min1.grid(row=4, column=1, padx=2, pady=2) + + Cluster_statistics.grid(row=5, column=0, padx=2, pady=(20, 2)) + Label_STD_1.grid(row=6, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + STD1_threshold1.grid(row=6, column=1, padx=2, pady=2) + + BUTTON1 = tk.Button( + parameter_frame, + text='Select Folder to Analyse!', + command=start_analysis, + bg='#df4c4c', + activebackground='#eaa90d', + font='Helvetica 12 bold', + height=2, + width=20 + ) + + BUTTON1.grid(row=9, column=0, columnspan=2, pady=125) + + """organize tab2""" + figure_frame2 = tk.Canvas(tab2, height=650, width=650, borderwidth=1, relief='ridge') + figure_frame2.grid(row=0, column=0) + + parameter_frame2 = tk.Frame(tab2) + parameter_frame2.grid(row=0, column=1, sticky='NE') + + BUTTON2 = tk.Button( + parameter_frame2, + text='Open h5 file', + command=getRAW_File_h5, + bg='#df4c4c', + activebackground='#eaa90d', + font='Helvetica 11 bold', + height=1, + width=15 + ) + + BUTTON2.grid(row=0, column=0, pady=20, sticky='E') + + BUTTON3 = tk.Button( + parameter_frame2, + text='Open csv file', + command=getRAW_File_csv, + bg='#df4c4c', + activebackground='#eaa90d', + font='Helvetica 11 bold', + height=1, + width=15 + ) + + BUTTON3.grid(row=1, column=0, pady=20, sticky='E') + + """organize tab3 """ + frame1 = tk.Frame(tab3, borderwidth=1, relief='ridge') + frame1.grid(row=0, column=0) + frame2 = tk.Frame(tab3, borderwidth=1, relief='ridge') + frame2.grid(row=0, column=1, sticky='N', padx=(50, 20)) + frame3 = tk.Frame(tab3, borderwidth=1, relief='ridge') + frame3.grid(row=0, column=2, sticky='N', padx=(50, 20)) + + """ parameters in advanced settings """ + Cluster_preprocessing = tk.Label(frame1, text='PREPROCESSING', font='Helvetica 9 bold') + Label_downsample = tk.Label(frame1, text='Downsampling rate') + Label_Filter1 = tk.Label(frame1, text='Butterworth filter degree') + Label_Filter2 = tk.Label(frame1, text='Cut-off frequency') + Label_ForceMin = tk.Label(frame1, text='Force threshold, pN') + Cluster_derivation = tk.Label(frame1, text="DERIVATION", font='Helvetica 9 bold') + Label_step_d = tk.Label(frame1, text='Step d') + Label_Frequency = tk.Label(frame1, text='Data frequency, Hz') + Cluster_statistics = tk.Label(frame1, text='STATISTICS', font='Helvetica 9 bold') + Label_STD_1 = tk.Label(frame1, text='Z-score') + Label_window_size = tk.Label(frame1, text='Moving median window size') + Label_STD_difference = tk.Label(frame1, text='SD difference threshold') + + downsample_value2 = tk.Entry(frame1) + downsample_value2.bind("", lambda event: user_input(event, downsample_value2, downsample_value1)) + + Filter_degree2 = tk.Entry(frame1) + Filter_degree2.bind("", lambda event: user_input(event, Filter_degree2, Filter_degree1)) + + Filter_cut_off2 = tk.Entry(frame1) + Filter_cut_off2.bind("", lambda event: user_input(event, Filter_cut_off2, Filter_cut_off1)) + + Force_Min2 = tk.Entry(frame1) + Force_Min2.bind("", lambda event: user_input(event, Force_Min2, Force_Min1)) + + STD1_threshold2 = tk.Entry(frame1) + STD1_threshold2.bind("", lambda event: user_input(event, STD1_threshold2, STD1_threshold1)) + + step_d_value = tk.Entry(frame1) + window_size_value = tk.Entry(frame1) + STD_difference_value = tk.Entry(frame1) + Frequency_value = tk.Entry(frame1) + + Cluster_preprocessing.grid(row=0, column=0, padx=2, pady=(20, 2)) + Label_downsample.grid(row=1, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + downsample_value2.grid(row=1, column=1, padx=(0, 20), pady=2) + + Label_Filter1.grid(row=2, column=0, padx=2, pady=2) + Filter_degree2.grid(row=2, column=1, padx=(0, 20), pady=2) + + Label_Filter2.grid(row=3, column=0, padx=2, pady=2) + Filter_cut_off2.grid(row=3, column=1, padx=(0, 20), pady=2) + + Label_ForceMin.grid(row=4, column=0, padx=2, pady=2) + Force_Min2.grid(row=4, column=1, padx=(0, 20), pady=2) + + Cluster_derivation.grid(row=5, column=0, padx=2, pady=(20, 2)) + Label_step_d.grid(row=6, column=0, padx=2, pady=2) + step_d_value.grid(row=6, column=1, padx=(0, 20), pady=2) + + Label_Frequency.grid(row=7, column=0, padx=2, pady=2) + Frequency_value.grid(row=7, column=1, padx=(0, 20), pady=2) + + Cluster_statistics.grid(row=8, column=0, padx=2, pady=(20, 2)) + Label_STD_1.grid(row=9, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + STD1_threshold2.grid(row=9, column=1, padx=(0, 20), pady=2) + + Label_window_size.grid(row=11, column=0, padx=2, pady=2) + window_size_value.grid(row=11, column=1, padx=(0, 20), pady=2) + + Label_STD_difference.grid(row=12, column=0, padx=2, pady=2) + STD_difference_value.grid(row=12, column=1, padx=(0, 20), pady=2) + + """ Output settings """ + check_box_smooth_data = tk.IntVar(value=1) + check_box_plot = tk.IntVar(value=1) + check_box_steps = tk.IntVar(value=1) + check_box_total_results = tk.IntVar(value=1) + check_box_fitting = tk.IntVar(value=1) + + Label_export = tk.Label(frame2, text="Select exported data", font='Helvetica 9 bold').grid(row=0, column=0, padx=20, pady=20) + + check_1 = tk.Checkbutton( + frame2, + text="Processed FD data", + variable=check_box_smooth_data, + ).grid(row=1, column=0, sticky='W') + + check_2 = tk.Checkbutton( + frame2, + text="Plot", + variable=check_box_plot, + ).grid(row=2, column=0, sticky='W') + + check_3 = tk.Checkbutton( + frame2, + text="Steps found", + variable=check_box_steps, + ).grid(row=3, column=0, sticky='W') + + check_4 = tk.Checkbutton( + frame2, + text="Total results (All steps from all files)", + variable=check_box_total_results, + ).grid(row=4, column=0, sticky='W') + + check_5 = tk.Checkbutton( + frame2, + text="Fitting", + variable=check_box_fitting, + ).grid(row=5, column=0, sticky='W') + + """ Fitting parameters """ + Cluster_fitting = tk.Label(frame3, text='FITTING', font='Helvetica 9 bold') + check_box_WLC = tk.IntVar(value=1) + check_box_FJC = tk.IntVar(value=0) + Label_dsLp = tk.Label(frame3, text='dsLp, nm') + Label_dsLp_up = tk.Label(frame3, text='dsLp, upper bound, nm') + Label_dsLp_low = tk.Label(frame3, text='dsLp, lower bound, nm') + Label_dsLc = tk.Label(frame3, text='dsLc, nm') + Label_ssLp = tk.Label(frame3, text='ssLp, nm') + Label_ssLc = tk.Label(frame3, text='ssLc, nm') + Label_stiffness_ds = tk.Label(frame3, text='dsK0, pN') + Label_stiffness_ds_up = tk.Label(frame3, text='dsK0, upper bound, pN') + Label_stiffness_ds_low = tk.Label(frame3, text='dsK0, lower bound, pN') + Label_stiffness_ss = tk.Label(frame3, text='ssK0, pN') + Label_f_offset = tk.Label(frame3, text='Force offset, pN') + Label_f_offset_up = tk.Label(frame3, text='Force offset, upper bound, pN') + Label_f_offset_low = tk.Label(frame3, text='Force offset, lower bound, pN') + Label_d_offset = tk.Label(frame3, text='Distance offset, nm') + Label_d_offset_up = tk.Label(frame3, text='Distance offset, upper bound, nm') + Label_d_offset_low = tk.Label(frame3, text='Distance offset, lower bound, nm') + + dsLp = tk.Entry(frame3) + dsLp_up = tk.Entry(frame3) + dsLp_low = tk.Entry(frame3) + dsLc = tk.Entry(frame3) + ssLp = tk.Entry(frame3) + ssLc = tk.Entry(frame3) + stiff_ds = tk.Entry(frame3) + stiff_ds_up = tk.Entry(frame3) + stiff_ds_low = tk.Entry(frame3) + stiff_ss = tk.Entry(frame3) + f_off = tk.Entry(frame3) + f_off_up = tk.Entry(frame3) + f_off_low = tk.Entry(frame3) + d_off = tk.Entry(frame3) + d_off_up = tk.Entry(frame3) + d_off_low = tk.Entry(frame3) + + Cluster_fitting.grid(row=0, column=0, padx=20, pady=20) + + check_WLC = tk.Checkbutton( + frame3, + text="WLC+WLC", + variable=check_box_WLC, + command=lambda: [check_box_WLC.set(value=1), check_box_FJC.set(value=0)] + ).grid(row=1, column=0, sticky='W', pady=20) + + check_FJC = tk.Checkbutton( + frame3, + text="WLC+FJC", + variable=check_box_FJC, + command=lambda: [check_box_WLC.set(value=0), check_box_FJC.set(value=1)] + ).grid(row=1, column=1, sticky='W', pady=20) + + Label_dsLp.grid(row=2, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + dsLp.grid(row=2, column=1, padx=(0, 20), pady=2) + + Label_dsLp_up.grid(row=3, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + dsLp_up.grid(row=3, column=1, padx=(0, 20), pady=2) + + Label_dsLp_low.grid(row=4, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + dsLp_low.grid(row=4, column=1, padx=(0, 20), pady=2) + + Label_dsLc.grid(row=5, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + dsLc.grid(row=5, column=1, padx=(0, 20), pady=2) + + Label_ssLp.grid(row=2, column=2, sticky=tk.E + tk.W, padx=2, pady=2) + ssLp.grid(row=2, column=3, padx=(0, 20), pady=2) + + Label_ssLc.grid(row=3, column=2, sticky=tk.E + tk.W, padx=2, pady=2) + ssLc.grid(row=3, column=3, padx=(0, 20), pady=2) + + Label_stiffness_ds.grid(row=6, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + stiff_ds.grid(row=6, column=1, padx=(0, 20), pady=2) + + Label_stiffness_ds_up.grid(row=7, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + stiff_ds_up.grid(row=7, column=1, padx=(0, 20), pady=2) + + Label_stiffness_ds_low.grid(row=8, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + stiff_ds_low.grid(row=8, column=1, padx=(0, 20), pady=2) + + Label_stiffness_ss.grid(row=6, column=2, sticky=tk.E + tk.W, padx=2, pady=2) + stiff_ss.grid(row=6, column=3, padx=(0, 20), pady=2) + + Label_f_offset.grid(row=9, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + f_off.grid(row=9, column=1, padx=(0, 20), pady=2) + + Label_f_offset_up.grid(row=10, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + f_off_up.grid(row=10, column=1, padx=(0, 20), pady=2) + + Label_f_offset_low.grid(row=11, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + f_off_low.grid(row=11, column=1, padx=(0, 20), pady=2) + + Label_d_offset.grid(row=12, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + d_off.grid(row=12, column=1, padx=(0, 20), pady=2) + + Label_d_offset_up.grid(row=13, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + d_off_up.grid(row=13, column=1, padx=(0, 20), pady=2) + + Label_d_offset_low.grid(row=14, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + d_off_low.grid(row=14, column=1, padx=(0, 20), pady=2) + + """organize tab4""" + # split tab into 2 frames, one for the figure to be displayed and one for the parameters + figure_frame_tab4 = tk.Canvas(tab4, height=650, width=650, borderwidth=1, relief='ridge') + figure_frame_tab4.grid(row=0, column=0) + + parameter_frame_tab4 = tk.Frame(tab4) + parameter_frame_tab4.grid(row=0, column=1, sticky='NE') + + BUTTON1_tab4 = tk.Button( + parameter_frame_tab4, + text='Fit Constant Force Data', + command=start_constantF, + bg='#df4c4c', + activebackground='#eaa90d', + font='Helvetica 11 bold', + height=1, + width=25 + ) + + BUTTON2_tab4 = tk.Button( + parameter_frame_tab4, + text='Display Constant Force Data', + command=show_constantF, + bg='#df4c4c', + activebackground='#eaa90d', + font='Helvetica 11 bold', + height=1, + width=25 + ) + + BUTTON1_tab4.grid(row=0, column=0, columnspan=2, padx=20, pady=20, sticky='E') + BUTTON2_tab4.grid(row=0, column=2, columnspan=2, padx=20, pady=20, sticky='E') + + # organize settings + Cluster_axes = tk.Label(parameter_frame_tab4, text='SET AXES', font='Helvetica 9 bold') + + Label_x_min = tk.Label(parameter_frame_tab4, text='x min') + x_min = tk.Entry(parameter_frame_tab4) + + Label_x_max = tk.Label(parameter_frame_tab4, text='x max') + x_max = tk.Entry(parameter_frame_tab4) + + Label_y_min = tk.Label(parameter_frame_tab4, text='y min') + y_min = tk.Entry(parameter_frame_tab4) + + Label_y_max = tk.Label(parameter_frame_tab4, text='y max') + y_max = tk.Entry(parameter_frame_tab4) + + Cluster_expected_fit = tk.Label(parameter_frame_tab4, text='EXPECTED VALUES', font='Helvetica 9 bold') + + Label_number_gauss = tk.Label(parameter_frame_tab4, text='Number of expected gaussians') + number_gauss = tk.Entry(parameter_frame_tab4) + + Label_mean_gauss = tk.Label(parameter_frame_tab4, text='Expected mean of each gaussian') + mean_gauss = tk.Entry(parameter_frame_tab4) + + Label_STD_gauss = tk.Label(parameter_frame_tab4, text='Expected SD of each gaussian') + STD_gauss = tk.Entry(parameter_frame_tab4) + + Label_amplitude_gauss = tk.Label(parameter_frame_tab4, text='Expected amplitude of each gaussian') + amplitude_gauss = tk.Entry(parameter_frame_tab4) + + Cluster_axes.grid(row=2, column=0, padx=2, pady=(20, 2)) + Label_x_min.grid(row=3, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + x_min.grid(row=3, column=1, padx=(0, 20), pady=2) + + Label_x_max.grid(row=4, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + x_max.grid(row=4, column=1, padx=(0, 20), pady=2) + + Label_y_min.grid(row=3, column=2, sticky=tk.E + tk.W, padx=2, pady=2) + y_min.grid(row=3, column=3, padx=(0, 20), pady=2) + + Label_y_max.grid(row=4, column=2, sticky=tk.E + tk.W, padx=2, pady=2) + y_max.grid(row=4, column=3, padx=(0, 20), pady=2) + + Cluster_expected_fit.grid(row=5, column=0, padx=2, pady=(20, 2)) + Label_number_gauss.grid(row=6, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + number_gauss.grid(row=6, column=1, sticky=tk.E + tk.W, padx=2, pady=2) + + Label_mean_gauss.grid(row=7, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + mean_gauss.grid(row=7, column=1, sticky=tk.E + tk.W, padx=2, pady=2) + + Label_STD_gauss.grid(row=8, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + STD_gauss.grid(row=8, column=1, sticky=tk.E + tk.W, padx=2, pady=2) + + Label_amplitude_gauss.grid(row=9, column=0, sticky=tk.E + tk.W, padx=2, pady=2) + amplitude_gauss.grid(row=9, column=1, sticky=tk.E + tk.W, padx=2, pady=2) + + # put default values into the widgets + parameters(parameter_frame, default_values_HF, default_values_FIT, default_values_constantF) + + # loop ensuring the GUI is running until closed + root.protocol("WM_DELETE_WINDOW", on_closing) + root.mainloop() diff --git a/POTATO_config.py b/POTATO_config.py new file mode 100644 index 0000000..ba23215 --- /dev/null +++ b/POTATO_config.py @@ -0,0 +1,67 @@ +"""default values for each data type""" + +default_values_HF = { + 'Downsampling rate': '30', + 'Butterworth filter degree': '4', + 'Cut-off frequency': '0.005', + 'Force threshold, pN': '5', + 'Z-score': '3', + 'Step d': '10', + 'Moving median window size': '800', + 'STD difference threshold': '0.05', + 'Data frequency, Hz': '1000' +} + +default_values_LF = { + 'Downsampling rate': '1', + 'Butterworth filter degree': '2', + 'Cut-off frequency': '0.5', + 'Force threshold, pN': '5', + 'Z-score': '3', + 'Step d': '3', + 'Moving median window size': '20', + 'STD difference threshold': '0.05', + 'Data frequency, Hz': '1000' +} + +default_values_CSV = { + 'Downsampling rate': '2', + 'Butterworth filter degree': '4', + 'Cut-off frequency': '0.005', + 'Force threshold, pN': '5', + 'Z-score': '3', + 'Step d': '10', + 'Moving median window size': '250', + 'STD difference threshold': '0.05', + 'Data frequency, Hz': '1000' +} + +default_values_FIT = { + 'Persistance-Length ds, nm': '40', + 'Persistance-Length ds, upper bound, nm': '80', + 'Persistance-Length ds, lower bound, nm': '12', + 'Persistance-Length ss, nm': '1', + 'Contour-Length ds, nm': '1256', + 'Contour-Length ss, nm': '0', + 'Stiffness ds, pN': '500', + 'Stiffness ds, upper bound, pN': '600', + 'Stiffness ds, lower bound, pN': '400', + 'Stiffness ss, pN': '800', + 'Force offset, pN': '0', + 'Force offset, upper bound, pN': '3', + 'Force offset, lower bound, pN': '-3', + 'Distance offset, nm': '0', + 'Distance offset, upper bound, nm': '300', + 'Distance offset, lower bound, nm': '-300' +} + +default_values_constantF = { + 'x min': '0', + 'x max': '180', + 'y min': '1290', + 'y max': '1320', + 'Number gauss': '3', + 'Mean': '1295,1310,1317', + 'STD': '1,1,1', + 'Amplitude': '10000,5000,10000' +} diff --git a/POTATO_constantF.py b/POTATO_constantF.py new file mode 100644 index 0000000..9d17b2f --- /dev/null +++ b/POTATO_constantF.py @@ -0,0 +1,221 @@ + +from tkinter import filedialog +import matplotlib.pyplot as plt +import h5py +import numpy as np +import time +import pylab +from scipy.optimize import curve_fit +from scipy.integrate import simps +import pandas as pd + +from POTATO_preprocessing import preprocess_RAW + + +# asks for a constant force file, opens and preprocesses the data +def get_constantF(input_settings, input_format, input_constantF): + global analysis_folder + + # open constant force data in csv or h5 format + file = filedialog.askopenfilename() + # get the directory of the selected file and create a path for the analysis folder + timestamp = time.strftime("%Y%m%d-%H%M%S") + path_split = file.split('/') + directory = '/'.join(path_split[:-1]) + + # check selected input format + if input_format['CSV'] == 1: + with open(file, "r") as f: + df = pd.read_csv(file) + # access the raw data + Force_1x = df.to_numpy()[:, 0] + Distance_1x = df.to_numpy()[:, 1] + # accessing the data frequency from user input + Frequency_value = input_settings['data_frequency'] + Force_Distance, Force_Distance_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) + + filename = path_split[-1][:-4] + analysis_folder = str(directory + '/Analysis_constantF_' + filename + '_' + timestamp) + + else: + with h5py.File(file, "r") as f: + filename = path_split[-1][:-3] + analysis_folder = str(directory + '/Analysis_constantF_' + filename + '_' + timestamp) + + if input_format['HF'] == 1: + # opening file and get the raw h5 values + Force_1x = f.get("Force HF/Force 1x") + Distance_1x = f.get("Distance/Piezo Distance") + # accessing the data frequency from the h5 file + Frequency_value = Force_1x.attrs['Sample rate (Hz)'] + Force_Distance, Force_Distance_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) + + elif input_format['LF'] == 1: + load_force = f.get("Force LF/Force 1x") + Force_1x = load_force[:]['Value'][:] + load_distance = f.get("Distance/Distance 1")[:] + Distance_1x = load_distance['Value'][:] + Force_Distance = np.column_stack((Force_1x, Distance_1x)) + + # calculating the data frequency based on start- and end-time of the measurement + size_F_LF = len(Force_1x) + stop_time_F_LF = load_force.attrs['Stop time (ns)'] + start_time_F_LF = load_force.attrs['Start time (ns)'] + Frequency_value = size_F_LF / ((stop_time_F_LF - start_time_F_LF) / 10**9) + + Force_Distance, Force_Distance_um = preprocess_RAW(Force_1x, Distance_1x, input_settings) + + return Force_Distance, Force_Distance_um, Frequency_value, filename, analysis_folder, timestamp + + +# function for a gaussian to fit onto the constant force data +def gauss(x, mu, sigma, A): + return A * pylab.exp(-(x - mu)**2 / 2 / sigma**2) + + +# depending on how many gaussians should be fitted, this function summs them +def sum_gauss(x, *args): + sum = 0 + fit_number = int(len(args) / 3) + for i in range(fit_number): + sum = sum + gauss(x, args[i], args[i + fit_number], args[i + 2 * fit_number]) + return sum + + +# for the fit, values have to be guessed so that the fit can be optimized easily +def expected_modal(input_constantF): + # get the input values for the fit guesses (specified in POTATO_config and the GUI) + mu = input_constantF['Mean'].split(',') + mu_map = map(int, mu) + mu = list(mu_map) + sigma = input_constantF['STD'].split(',') + sigma_map = map(int, sigma) + sigma = list(sigma_map) + A = input_constantF['Amplitude'].split(',') + A_map = map(int, A) + A = list(A_map) + + return mu, sigma, A + + +# display constant force data in the GUI to perform guesses on the fit +def display_constantF(FD, FD_um, frequency, input_settings, input_constantF): + global Figure_constantF + + """set constant axes-values""" + x_min = input_constantF['x min'] + x_max = input_constantF['x max'] + y_min = input_constantF['y min'] + y_max = input_constantF['y max'] + + filteredDistance = FD_um[:, 1] + time_Distance = range(0, len(filteredDistance) * input_settings['downsample_value'], input_settings['downsample_value']) + time_vector_D = range(0, len(filteredDistance) * input_settings['downsample_value'], input_settings['downsample_value']) + + time_D = [] + for t in time_vector_D: + time_D.append(t / int(frequency)) + + Distance_time = [] + for t in time_Distance: + Distance_time.append(t / int(frequency)) + + # create a Figure + Figure_constantF, (ax_scatter, ax_histy) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [3, 1]}, sharey=True) + + ax_scatter.set_xlabel('time, s') + ax_scatter.set_ylabel('Distance, nm') + ax_scatter.tick_params(direction='in', top=True, right=True) + ax_scatter.set_xlim(x_min, x_max) + ax_scatter.set_ylim(y_min, y_max) + + # the scatter plot: + filteredDistance_ready = FD[:, 1] + ax_scatter.plot(time_D, filteredDistance_ready) + + binwidth = 0.1 + bins = np.arange(min(filteredDistance_ready), max(filteredDistance_ready), binwidth) + + ax_histy.tick_params(direction='in', labelleft=False) + ax_histy.set_xlabel('counts') + ax_histy.set_ylim(y_min, y_max) + hist_D = ax_histy.hist(filteredDistance_ready, bins=bins, orientation='horizontal', color='k') + + return Figure_constantF, hist_D, filteredDistance_ready + + +# open constant force data and try to fit gaussians on the distance distribution +def fit_constantF(hist_D, FD, filteredDistance_ready, frequency, input_settings, input_constantF, filename_i, timestamp): + + hist_der_value = [] + hist_der_position = [] + + for i in range(1, len(hist_D[0])): + der = (hist_D[0][i] - hist_D[0][i - 1]) / (hist_D[1][i] - hist_D[1][i - 1]) + hist_der_value.append(der) + hist_der_position.append((hist_D[1][i] + hist_D[1][i - 1]) / 2) + + mu, sig, A = expected_modal(input_constantF) + p0 = np.array([mu, sig, A]) + params, cov = curve_fit(sum_gauss, hist_D[1][0:-1], hist_D[0], p0) + fit_number = int(len(params) / 3) + peak_means = [] + sigmas = [] + peak_amplitudes = [] + G = [] + + for i in range(0, fit_number): + peak_means.append(params[i]) + sigmas.append(params[i + fit_number]) + peak_amplitudes.append(params[i + 2 * fit_number]) + G_x = gauss(hist_D[1][0:-1], params[i], params[i + fit_number], params[i + 2 * fit_number]) + G.append(G_x) + + all_param = peak_means + sigmas + peak_amplitudes + tpl_all_param = tuple(all_param) + Gsum = sum_gauss(hist_D[1][0:-1], *tpl_all_param) + + figure2, subplot2 = plt.subplots(1, 1) + + binwidth = 0.1 + bins = np.arange(min(filteredDistance_ready), max(filteredDistance_ready), binwidth) + subplot2.hist(filteredDistance_ready, bins=bins) + + subplot2.plot(hist_D[1][0:-1], Gsum, 'r') + subplot2.plot(peak_means, peak_amplitudes, color='k', linewidth=0, marker='.', markersize=10) + + Areas = [] + for i in G: + subplot2.plot(hist_D[1][0:-1], i, 'k', linestyle='dashed') + Area_gauss = simps(i, hist_D[1][0:-1]) + Areas.append(Area_gauss) + + fractions = [] + for i in range(len(Areas)): + Area_x = Areas[i] + # Area_rest = sum(Areas[:i]) + sum(Areas[i + 1:]) + Area_all = sum(Areas[:]) + A_fraction = Area_x / Area_all + + fractions.append(A_fraction) + + # export csv containing all the computed values + # total_results_export=pd.DataFrame(zip(X_step_data,F_step_data, F, x_WLC, X, x_FJC), columns=['Distance data','Force with step','Force model','WLC distance','WLC + FJC distance', 'FJC distance']) + smooth_export = pd.DataFrame(zip(FD[:, 0], filteredDistance_ready), columns=['Force [pN]', 'Distance [nm]']) + smooth_export.to_csv((analysis_folder + "/" + filename_i + "_" + timestamp + '_smooth.csv'), index=False, header=True) + + # histogram & gaussians + histogram_export = pd.DataFrame(zip(hist_D[1][0:-1], hist_D[0], Gsum, G), columns=['Distance, nm', 'Counts', 'Gaussian sum', 'Gaussians']) + histogram_export.to_csv((analysis_folder + "/" + filename_i + "_" + timestamp + '_histogram.csv'), index=False, header=True) + + # gaussian parameters + areas + table_export = pd.DataFrame(zip(peak_means, sigmas, peak_amplitudes, Areas, fractions), columns=['mean', 'sigma', 'amplitude', 'Area', 'fraction']) + table_export.to_csv((analysis_folder + "/" + filename_i + "_" + timestamp + '_table.csv'), index=False, header=True) + # plots + plotname2 = analysis_folder + "/" + filename_i + "_histogram_fitted" + ".png" + figure2.savefig(plotname2) + + plotname_figure_constant_f = analysis_folder + "/" + filename_i + "_visualization" + ".png" + Figure_constantF.savefig(plotname_figure_constant_f) + + return figure2 diff --git a/POTATO_find_steps.py b/POTATO_find_steps.py new file mode 100644 index 0000000..a2e7b13 --- /dev/null +++ b/POTATO_find_steps.py @@ -0,0 +1,344 @@ + +from matplotlib.figure import Figure +from matplotlib.lines import Line2D +import numpy as np +import statistics +from scipy.signal import argrelextrema + + +# calculates the standard deviation of a dataset +def STD(input_data, column_number): + dt_STD = statistics.pstdev(input_data[:, column_number]) + return dt_STD + + +# creates a moving median of the dataset, therefore using slices of len(window_size) +def moving_median(input_data, column_number, window_size): + # window_half so that the moving median is dependent of values left AND right + window_half = int(window_size / 2) + # window vector defines everything from [window_half:-window_half] + window_vector = range(window_half, len(input_data) - window_half, 1) + # regions left and right have to be treated differently, everything that is < window_half to the edges is given the same mm value + window_left = range(len(input_data[:window_half])) + window_right = range(len(input_data[-window_half:])) + mov_med = [] + + for n in window_left: + mm_left = np.median(input_data[:window_half + n, column_number]) + mov_med.append(mm_left) + + for n in window_vector: + mm = np.median(input_data[n - window_half:n + window_half, column_number]) + mov_med.append(mm) + + for n in window_right: + mm_right = np.median(input_data[-window_half:, column_number]) + mov_med.append(mm_right) + + return mov_med + + +# sorting the data based on a x times STD threshold (normal distibuted noise vs extreme values from steps) +def cut_off(input_array, column_number, mm, std, n_of_std): + # sort values - inside STD region, above STD region and below STD region + F_values_inside = [] + PD_values_inside = [] + F_dt_inside = [] + PD_dt_inside = [] + inside_indices = [] + + F_values_below = [] + PD_values_below = [] + F_dt_below = [] + PD_dt_below = [] + + F_values_above = [] + PD_values_above = [] + F_dt_above = [] + PD_dt_above = [] + + i = 0 + for n in range(0, len(input_array), 1): + if input_array[n, column_number] > mm[int(i)] + n_of_std * std: + F_dt_above.append(input_array[n, 2]) + F_values_above.append(input_array[n, 0]) + PD_values_above.append(input_array[n, 1]) + PD_dt_above.append(input_array[n, 3]) + + elif input_array[n, column_number] < mm[int(i)] - n_of_std * std: + F_dt_below.append(input_array[n, 2]) + F_values_below.append(input_array[n, 0]) + PD_values_below.append(input_array[n, 1]) + PD_dt_below.append(input_array[n, 3]) + else: + F_dt_inside.append(input_array[n, 2]) + F_values_inside.append(input_array[n, 0]) + PD_values_inside.append(input_array[n, 1]) + PD_dt_inside.append(input_array[n, 3]) + inside_indices.append(n) + + i = n * (len(mm) / len(input_array)) - 1 + + Above = np.column_stack([F_values_above, PD_values_above, F_dt_above, PD_dt_above]) + Below = np.column_stack([F_values_below, PD_values_below, F_dt_below, PD_dt_below]) + Inside = np.column_stack([F_values_inside, PD_values_inside, F_dt_inside, PD_dt_inside]) + + return Above, Inside, Below, inside_indices + + +# searching for minima in the force derivation to identify unfolding events +def find_steps_F(input_settings, filename_i, Force_Distance, der_arr): + global y_vector_F + global F_mm2_STD2_positive + global F_mm2_STD2_negative + + results_F = [] + PD_start_F = [] + + STD_1 = STD(der_arr, 2) + F_mm = moving_median(der_arr, 2, input_settings['window_size']) + Above, Inside, Below, inside_indices_F = cut_off(der_arr, 2, F_mm, STD_1, input_settings['z-score']) + + F_mm2_STD2_positive = [] + F_mm2_STD2_negative = [] + n_runs = 1 + + while abs(STD_1 - STD(Inside, 2)) / STD_1 > input_settings['STD_diff']: + F_mm = moving_median(Inside, 2, input_settings['window_size']) + STD_1 = STD(Inside, 2) + Above, Inside, Below, inside_indices_F = cut_off(der_arr, 2, F_mm, STD_1, input_settings['z-score']) + n_runs = n_runs + 1 + + if STD_1 < 0.05: + STD_1 = 0.05 + + print('STD is', STD_1) + + Above, Inside, Below, inside_indices_F = cut_off(der_arr, 2, F_mm, STD_1, input_settings['z-score']) + F_mm = moving_median(Inside, 2, input_settings['window_size']) + + y_vector_F = [] + last = 0 + for n in range(len(der_arr)): + if n in inside_indices_F: + y_vector_F.append(F_mm[n]) + last = n + else: + y_vector_F.append(F_mm[last]) + F_mm.insert(n, F_mm[last]) + + for i in range(len(F_mm)): + F_mm2_STD2_positive.append(F_mm[i] + input_settings['z-score'] * STD_1) + F_mm2_STD2_negative.append(F_mm[i] - input_settings['z-score'] * STD_1) + + # find the step points + # for those steps that cross the STD2 threshold -> find the closest 0 values prior/following to the crossing one + + # for local minima + + loc_min = argrelextrema((Below[:, 2]), np.less) + + n_steps = 1 + + for k in loc_min[0]: + F_dt_loc_min = Below[k, 2] + F_index = np.where(der_arr[:, 2] == F_dt_loc_min) + + # find start and end of the step + i_start = F_index[0][0] + i_end = F_index[0][0] + while der_arr[i_start, 2] < F_mm[int(i_start * len(F_mm) / len(der_arr))] and der_arr[i_start, 2] < der_arr[i_start - 1, 2] and i_start >= 1: + i_start = i_start - 1 + if i_start == 0: + i_start = 1 + while der_arr[i_end, 2] < F_mm[int(i_end * len(F_mm) / len(der_arr))] and der_arr[i_end, 2] < der_arr[i_end + 1, 2] and i_end < (len(der_arr) - 2): + i_end = i_end + 1 + + PD_start_F.append(der_arr[i_start, 1]) + dict1 = { + "filename": filename_i, + "Derivation of": 'Force', + 'step #': n_steps, + 'F1': der_arr[i_start, 0], + 'F2': der_arr[i_end, 0], + 'Fc': (der_arr[i_start, 0] + der_arr[i_end, 0]) / 2, + 'step start': der_arr[i_start, 1], + 'step end': der_arr[i_end, 1], + 'step length': der_arr[i_end, 1] - der_arr[i_start, 1], + } + + results_F.append(dict1) + + n_steps = n_steps + 1 + + return results_F, PD_start_F + + +# searching for maxima in the distance derivation to identify unfolding events +def find_steps_PD(input_settings, filename_i, Force_Distance, der_arr): + global y_vector_PD + global PD_mm2_STD2_positive + global PD_mm2_STD2_negative + + results_PD = [] + PD_start_PD = [] + + STD_1 = STD(der_arr, 3) + PD_mm = moving_median(der_arr, 3, input_settings['window_size']) + + Above, Inside, Below, inside_indices_PD = cut_off(der_arr, 3, PD_mm, STD_1, input_settings['z-score']) + + PD_mm2_STD2_positive = [] + PD_mm2_STD2_negative = [] + + n_runs = 1 + while abs(STD_1 - STD(Inside, 3)) / STD_1 > input_settings['STD_diff']: + PD_mm = moving_median(Inside, 3, input_settings['window_size']) + STD_1 = STD(Inside, 3) + Above, Inside, Below, inside_indices_PD = cut_off(der_arr, 3, PD_mm, STD_1, input_settings['z-score']) + n_runs = n_runs + 1 + + if STD_1 < 0.05: + STD_1 = 0.05 + + print('STD is', STD_1) + + Above, Inside, Below, inside_indices_PD = cut_off(der_arr, 3, PD_mm, STD_1, input_settings['z-score']) + PD_mm = moving_median(Inside, 3, input_settings['window_size']) + + y_vector_PD = [] + last = 0 + for n in range(len(der_arr)): + if n in inside_indices_PD: + y_vector_PD.append(PD_mm[n]) + last = n + else: + y_vector_PD.append(PD_mm[last]) + PD_mm.insert(n, PD_mm[last]) + + for i in range(len(PD_mm)): + PD_mm2_STD2_positive.append(PD_mm[i] + input_settings['z-score'] * STD_1) + PD_mm2_STD2_negative.append(PD_mm[i] - input_settings['z-score'] * STD_1) + # find the step points + # for those steps that cross the 3*STD2 threshold -> find the closest 0 values prior/following to the crossing one + + # for local minima + + loc_max = argrelextrema(Above[:, 3], np.greater) + + n_steps = 1 + + for k in loc_max[0]: + PD_dt_loc_max = Above[k, 3] + PD_index = np.where(der_arr[:, 3] == PD_dt_loc_max) + + # find start and end of the step + i_start = PD_index[0][0] + i_end = PD_index[0][0] + + while der_arr[i_start, 3] > PD_mm[int(i_start * len(PD_mm) / len(der_arr))] and der_arr[i_start - 1, 3] < der_arr[i_start, 3] and i_start >= 1: + i_start = i_start - 1 + if i_start == 0: + i_start = 1 + + while der_arr[i_end, 3] > PD_mm[int(i_end * len(PD_mm) / len(der_arr))] and der_arr[i_end, 3] > der_arr[i_end + 1, 3] and i_end < (len(der_arr) - 2): + i_end = i_end + 1 + + PD_start_PD.append(der_arr[i_start, 1]) + + dict1 = { + "filename": filename_i, + "Derivation of": 'Distance', + 'step #': n_steps, + 'F1': der_arr[i_start, 0], + 'F2': der_arr[i_end, 0], + 'Fc': (der_arr[i_start, 0] + der_arr[i_end, 0]) / 2, + 'step start': der_arr[i_start, 1], + 'step end': der_arr[i_end, 1], + 'step length': der_arr[i_end, 1] - der_arr[i_start, 1], + } + + results_PD.append(dict1) + + n_steps = n_steps + 1 + + return results_PD, PD_start_PD + + +# define steps, that were found by Force- and Distance-derivation (used for fitting afterwards) +def find_common_steps(F_steps, PD_steps): + common_steps = [] + x = 1 + + for n in range(0, len(F_steps)): + F_steps_dict = F_steps[n] + step_F_middle = (float(F_steps_dict['step start']) + float(F_steps_dict['step end'])) / 2 + for i in range(0, len(PD_steps)): + PD_steps_dict = PD_steps[i] + + if step_F_middle > PD_steps_dict['step start'] and step_F_middle < PD_steps_dict['step end']: + new_steps = PD_steps[i] + new_step_number = {'step #': x} + new_steps.update(new_step_number) + common_steps.append(new_steps) + x += 1 + + return common_steps + + +def calc_integral(area_1, area_2, step_start_d, step_end_d, step_start_f, step_end_f): + # calculate work from integrals (estimation) + work_step = area_1 + ((step_end_d - step_start_d) * 0.5 * (step_start_f + step_end_f)) - area_2 + work_in_kT = work_step / 4.114 # 4.114 is the approximate value of kT (Boltzmann constant times temperature) at 298 K + + return work_step, work_in_kT + + +def save_figure(export_PLOT, timestamp, filename_i, analysis_folder, Force_Distance, derivation_array, F_trimmed, PD_trimmed, PD_start_F, PD_start_PD): + figure1 = Figure(figsize=(10, 6), dpi=100) + subplot1 = figure1.add_subplot(221) + subplot2 = figure1.add_subplot(222) + subplot3 = figure1.add_subplot(223) + subplot4 = figure1.add_subplot(224) + + subplot1.set_ylabel("Force (pN)") + subplot1.set_title("FD-Curve") + subplot1.scatter(Force_Distance[:, 1], Force_Distance[:, 0], marker='.', s=0.6, linewidths=None, alpha=1) + + legend_elements = [ + Line2D([0], [0], color='red', lw=1), + Line2D([0], [0], color='green', lw=1) + ] + + subplot2.set_title("Trimmed FD-Curve - steps marked") + subplot2.legend(legend_elements, ['Steps found by F-derivation', 'Steps found by D-derivation']) + subplot2.scatter(PD_trimmed, F_trimmed, marker='.', s=0.6, linewidths=None, alpha=1) + + for i in range(len(PD_start_F)): + subplot2.axvline(x=PD_start_F[i], ymin=0, ymax=30, color='red', lw=0.5, alpha=0.5) + for i in range(len(PD_start_PD)): + subplot2.axvline(x=PD_start_PD[i], ymin=0, ymax=30, color='green', lw=0.5, alpha=0.5) + + subplot3.set_xlabel("Distance (nm)") + subplot3.set_ylabel("delta Distance (nm/ms)") + subplot3.set_title("Distance derivation") + subplot3.plot(derivation_array[:, 1], derivation_array[:, 3]) + + subplot3.plot(derivation_array[:, 1], y_vector_PD) + subplot3.fill_between(derivation_array[:, 1], PD_mm2_STD2_positive, PD_mm2_STD2_negative, color='black', alpha=0.30) + + subplot4.set_xlabel("Distance (nm)") + subplot4.set_ylabel("delta Force (pN/ms)") + subplot4.set_title("Force derivation") + subplot4.plot(derivation_array[:, 1], derivation_array[:, 2]) + + subplot4.plot(derivation_array[:, 1], y_vector_F) + subplot4.fill_between(derivation_array[:, 1], list(F_mm2_STD2_positive), list(F_mm2_STD2_negative), color='black', alpha=0.30) + + if export_PLOT == 1: + plotname = analysis_folder + "/" + filename_i + "_plot_" + timestamp + ".png" + figure1.savefig(plotname, dpi=600) + else: + pass + + figure1.clf() diff --git a/POTATO_fitting.py b/POTATO_fitting.py new file mode 100644 index 0000000..93e2b2e --- /dev/null +++ b/POTATO_fitting.py @@ -0,0 +1,234 @@ +import matplotlib.pyplot as plt +import lumicks.pylake as lk +import numpy as np +from scipy.integrate import simps +from matplotlib.lines import Line2D + + +"""define the functions used for fitting""" + + +def fitting_ds(filename_i, input_settings, export_data, input_fitting, i_start, Force_Distance, derivation_array, F_low): + global model_ds, fit_ds + global ds_fit_dict + global f_fitting_region_ds, d_fitting_region_ds + global export_fit_ds + global fitting_model + + start_step1 = np.where(derivation_array[:, 1] == i_start) + start_step1 = start_step1[0][0] + + f_fitting_region_ds = Force_Distance[0:start_step1 * input_settings['step_d'] + len(F_low), 0] + d_fitting_region_ds = Force_Distance[0:start_step1 * input_settings['step_d'] + len(F_low), 1] + + model_ds = lk.inverted_odijk("ds_part").subtract_independent_offset() + lk.force_offset("ds_part") + + fit_ds = lk.FdFit(model_ds) + + fit_ds.add_data("Double stranded", f_fitting_region_ds, d_fitting_region_ds) + # Persistance length bounds + fit_ds["ds_part/Lp"].value = input_fitting['lp_ds'] + fit_ds["ds_part/Lp"].upper_bound = input_fitting['lp_ds_up'] + fit_ds["ds_part/Lp"].lower_bound = input_fitting['lp_ds_low'] + + # Force shift bounds + fit_ds["ds_part/f_offset"].value = input_fitting['offset_f'] + fit_ds["ds_part/f_offset"].upper_bound = input_fitting['offset_f_up'] + fit_ds["ds_part/f_offset"].lower_bound = input_fitting['offset_f_low'] + + # distance shift bounds + fit_ds["ds_part/d_offset"].value = input_fitting['offset_d'] + fit_ds["ds_part/d_offset"].upper_bound = input_fitting['offset_d_up'] + fit_ds["ds_part/d_offset"].lower_bound = input_fitting['offset_d_low'] + + # stiffnes + fit_ds["ds_part/St"].value = input_fitting['ds_stiff'] + fit_ds["ds_part/St"].upper_bound = input_fitting['ds_stiff_up'] + fit_ds["ds_part/St"].lower_bound = input_fitting['ds_stiff_low'] + + # contour length + Lc_initial_guess = input_fitting['lc_ds'] # nm + Lc_range = 5 + fit_ds["ds_part/Lc"].upper_bound = Lc_initial_guess + Lc_range + fit_ds["ds_part/Lc"].lower_bound = Lc_initial_guess - Lc_range + fit_ds["ds_part/Lc"].value = Lc_initial_guess + fit_ds["ds_part/Lc"].unit = 'nm' + + fit_ds.fit() + fit_qual = fit_ds.log_likelihood() + print(fit_ds.params) + + # calculate the integral until the first unfolding step + # used to calculate the work done by the machine + distance_integral = np.arange(min(Force_Distance[:, 1]), i_start) + ds_integral = model_ds(distance_integral, fit_ds) + area_ds = simps(ds_integral) + print("area_ds = " + str(area_ds)) + + # export the fitting parameters + ds_fit_dict = { + 'filename': filename_i, + 'model': 'WLC', + 'log_likelihood': fit_qual, + 'Lc_ds': fit_ds["ds_part/Lc"].value, + 'Lp_ds': fit_ds["ds_part/Lp"].value, + 'Lp_ds_stderr': fit_ds["ds_part/Lp"].stderr, + 'St_ds': fit_ds["ds_part/St"].value, + 'f_offset': fit_ds["ds_part/f_offset"].value, + 'd_offset': fit_ds["ds_part/d_offset"].value + } + + return ds_fit_dict, area_ds + + +def fitting_ss(filename_i, input_settings, export_data, input_fitting, i_start, i_end, Force_Distance, fix, max_range, derivation_array, F_low): + global model_ss + global ss_fit_dict + + start_fitting_region = np.where(derivation_array[:, 1] == i_start) + end_fitting_region = np.where(derivation_array[:, 1] == i_end) + start_fitting_region = start_fitting_region[0][0] + end_fitting_region = end_fitting_region[0][0] + + raw_f_fitting_region = Force_Distance[start_fitting_region * input_settings['step_d'] + len(F_low):end_fitting_region * input_settings['step_d'] + len(F_low), 0] + raw_d_fitting_region = Force_Distance[start_fitting_region * input_settings['step_d'] + len(F_low):end_fitting_region * input_settings['step_d'] + len(F_low), 1] + + # downsample the data used for fitting to 200 datapoints + if len(raw_f_fitting_region) > 200: + f_fitting_region_ss = raw_f_fitting_region[::int(len(raw_f_fitting_region) / 200)] + d_fitting_region_ss = raw_d_fitting_region[::int(len(raw_f_fitting_region) / 200)] + else: + f_fitting_region_ss = raw_f_fitting_region + d_fitting_region_ss = raw_d_fitting_region + + if input_fitting['WLC+FJC'] == 1: + model_ss = lk.odijk("DNA_2") + lk.freely_jointed_chain("RNA") + elif input_fitting['WLC+WLC'] == 1: + model_ss = lk.odijk("DNA_2") + lk.odijk("RNA") + + model_ss = model_ss.invert().subtract_independent_offset() + lk.force_offset("DNA") + fit_ss = lk.FdFit(model_ss) + + fit_ss.add_data("ss_part", f_fitting_region_ss, d_fitting_region_ss) + + # ds part parameters + # Persistance length bounds + + # Lp_ds_range=fit_ds["DNA/Lp"].value/10 + fit_ss["DNA_2/Lp"].value = ds_fit_dict['Lp_ds'] + fit_ss["DNA_2/Lp"].upper_bound = ds_fit_dict['Lp_ds'] * (1 + max_range / 100) + fit_ss["DNA_2/Lp"].lower_bound = ds_fit_dict['Lp_ds'] * (1 - max_range / 100) + # if fix==1: + fit_ss["DNA_2/Lp"].fixed = 'True' + + fit_ss["DNA/f_offset"].upper_bound = 5 + fit_ss["DNA/f_offset"].lower_bound = -5 + fit_ss["DNA/f_offset"].value = ds_fit_dict['f_offset'] + fit_ss["DNA/f_offset"].fixed = 'True' + + fit_ss["inv(DNA_2_with_RNA)/d_offset"].value = ds_fit_dict['d_offset'] + fit_ss["inv(DNA_2_with_RNA)/d_offset"].fixed = 'True' + + # contour length + # Lc_ds_range=Lc_initial_guess/100 # nm + fit_ss["DNA_2/Lc"].upper_bound = ds_fit_dict['Lc_ds'] * (1 + max_range / 100) + fit_ss["DNA_2/Lc"].lower_bound = ds_fit_dict['Lc_ds'] * (1 - max_range / 100) + fit_ss["DNA_2/Lc"].value = ds_fit_dict['Lc_ds'] + fit_ss["DNA_2/Lc"].unit = 'nm' + # if fix==1: + fit_ss["DNA_2/Lc"].fixed = 'True' + + # stifness + + fit_ss["DNA_2/St"].upper_bound = ds_fit_dict['St_ds'] * (1 + max_range / 100) + fit_ss["DNA_2/St"].lower_bound = ds_fit_dict['St_ds'] * (1 - max_range / 100) + fit_ss["DNA_2/St"].value = ds_fit_dict['St_ds'] + if fix == 1: + fit_ss["DNA_2/St"].fixed = 'True' + + # ss part parameters + + # Persistance length bounds + fit_ss["RNA/Lp"].value = input_fitting['lp_ss'] + fit_ss["RNA/Lp"].lower_bound = 0.8 + fit_ss["RNA/Lp"].upper_bound = 2 + if fix == 1: + fit_ss["RNA/Lp"].fixed = 'True' + + # stiffnes + fit_ss["RNA/St"].value = input_fitting['ss_stiff'] + fit_ss["RNA/St"].lower_bound = 300 + fit_ss["RNA/St"].upper_bound = 1500 + + # contour length + fit_ss["RNA/Lc"].value = input_fitting['lc_ss'] + fit_ss["RNA/Lc"].lower_bound = 0 + fit_ss["RNA/Lc"].upper_bound = input_fitting['lc_ss'] + 100 + + fit_ss["RNA/Lc"].unit = 'nm' + + fit_ss.fit() + print(fit_ss.params) + + # calculate the integrals of the fitted functions + distance_integral_fit_start = np.arange(min(Force_Distance[:, 1]), i_start) + ss_integral_start = model_ss(distance_integral_fit_start, fit_ss) + area_ss_fit_start = simps(ss_integral_start) + print("area_ss_start = " + str(area_ss_fit_start)) + + distance_integral_fit_end = np.arange(min(Force_Distance[:, 1]), i_end) + ss_integral_end = model_ss(distance_integral_fit_end, fit_ss) + area_ss_fit_end = simps(ss_integral_end) + print("area_ss_end = " + str(area_ss_fit_end)) + + fit_qual = fit_ss.log_likelihood() + + if input_fitting["WLC+WLC"] == 1: + fitting_model = "WLC+WLC" + elif input_fitting["WLC+FJC"] == 1: + fitting_model = "WLC+FJC" + + ss_fit_dict = { + 'filename': filename_i, + 'model': fitting_model, + 'log_likelihood': fit_qual, + 'Lc_ds': fit_ss["DNA_2/Lc"].value, + 'Lp_ds': fit_ss["DNA_2/Lp"].value, + 'St_ds': fit_ss["DNA_2/St"].value, + 'Lc_ss': fit_ss["RNA/Lc"].value, + 'Lc_ss_stderr': fit_ss["RNA/Lc"].stderr, + 'Lp_ss': fit_ss["RNA/Lp"].value, + 'St_ss': fit_ss["RNA/St"].value, + 'f_offset': fit_ss["DNA/f_offset"].value, + 'd_offset': fit_ss["inv(DNA_2_with_RNA)/d_offset"].value + } + + return fit_ss, f_fitting_region_ss, d_fitting_region_ss, ss_fit_dict, area_ss_fit_start, area_ss_fit_end + + +def plot_fit(fit, start_force_ss, start_distance_ss, Force_Distance, save_folder, filename_i, start_time): + distance = np.arange(min(Force_Distance[:, 1]), max(Force_Distance[:, 1]) + 50, 2) + F_ds_model = model_ds(distance, fit_ds) + + legend_elements = [ + Line2D([0], [0], color='k', lw=1, alpha=0.85), + Line2D([0], [0], color='r', lw=1), + Line2D([0], [0], color='gray', linestyle='dashed', lw=1) + ] + + plt.plot(Force_Distance[:, 1], Force_Distance[:, 0], 'k', alpha=0.85) + plt.scatter(d_fitting_region_ds, f_fitting_region_ds, color='r', s=4) + plt.plot(distance, F_ds_model, linestyle='dashed', color='gray') + plt.ylabel("Force [pN]") + plt.xlabel("Distance [nm]") + plt.legend(legend_elements, ['FD-Curve', 'Part used for fitting', 'Fitted WLC model']) + + for i in range(0, len(fit)): + F_ss_model = model_ss(distance, fit[i]) + plt.scatter(start_distance_ss[i], start_force_ss[i], s=4) + plt.plot(distance, F_ss_model, linestyle='dashed', color='gray') + + plotname = save_folder + "/" + filename_i + "_fit_" + start_time + ".png" + + plt.savefig(plotname, dpi=600) + plt.clf() diff --git a/POTATO_preprocessing.py b/POTATO_preprocessing.py new file mode 100644 index 0000000..06bd8f6 --- /dev/null +++ b/POTATO_preprocessing.py @@ -0,0 +1,86 @@ + +from scipy import signal +import numpy as np + + +def preprocess_RAW(Force, Distance, input_settings): + # Downsample + Force_ds = Force[::input_settings['downsample_value']] + Distance_ds = Distance[::input_settings['downsample_value']] + + # Filter + b, a = signal.butter(input_settings['filter_degree'], input_settings['filter_cut_off']) + filteredForce = signal.filtfilt(b, a, Force_ds) + filteredDistance = signal.filtfilt(b, a, Distance_ds) + filteredDistance_ready = filteredDistance * 1000 + + Force_Distance = np.column_stack((filteredForce, filteredDistance_ready)) + + if Force_Distance[0, 1] > Force_Distance[-1, 1]: # reverse + Force_Distance = np.flipud(Force_Distance) + + Force_Distance_um = np.column_stack((filteredForce, filteredDistance)) + + return Force_Distance, Force_Distance_um + + +# creates a dataset from min force threshold to max force value +def trim_data(FD_data, F_min): + global F_trimmed, PD_trimmed, F_low + + F_trimmed = [] + PD_trimmed = [] + + F_max = np.where(FD_data[:, 0] == max(FD_data[:, 0])) + fi = F_max[0][0] + + while FD_data[fi, 0] < FD_data[fi - 10, 0]: + fi = fi - 1 + + while FD_data[fi, 1] < FD_data[fi - 10, 1]: + fi = fi - 1 + fi0 = fi + + fi = F_max[0][0] + # print(fi) + + while FD_data[fi, 0] > F_min: + fi = fi - 1 + # print(Force_Distance[fi, 0]) + F_trimmed = FD_data[fi:fi0, 0] + PD_trimmed = FD_data[fi:fi0, 1] + F_low = FD_data[:fi, 0] + + return F_trimmed, PD_trimmed, F_low + + +# creates derivations for Force and Distance of the trimmed datasets +def create_derivation(input_settings, Frequency_value): + d_time = 1 / Frequency_value * input_settings['downsample_value'] * input_settings['step_d'] + + x = input_settings['step_d'] + + derivation_list = [] + + while x < len(F_trimmed): + if PD_trimmed[0] < PD_trimmed[-1]: + F_value = (F_trimmed[x] + F_trimmed[x - input_settings['step_d']]) / 2 + PD_value = (PD_trimmed[x] + PD_trimmed[x - input_settings['step_d']]) / 2 + delta_PD = PD_trimmed[x] - PD_trimmed[x - input_settings['step_d']] + delta_F = F_trimmed[x] - F_trimmed[x - input_settings['step_d']] + F_dt = delta_F / d_time + PD_dt = delta_PD / d_time + else: + F_value = (F_trimmed[x] + F_trimmed[(x - input_settings['step_d'])]) / 2 + PD_value = (PD_trimmed[x] + PD_trimmed[(x - input_settings['step_d'])]) / 2 + delta_PD = PD_trimmed[x] - PD_trimmed[(x - input_settings['step_d'])] + delta_F = F_trimmed[x] - F_trimmed[(x - input_settings['step_d'])] + F_dt = (delta_F / d_time) * (-1) + PD_dt = (delta_PD / d_time) * (-1) + + derivation_list.append([F_value, PD_value, F_dt, PD_dt]) + x = x + input_settings['step_d'] + + derivation_array = np.array(derivation_list) + + return derivation_array diff --git a/POTATO_readme.txt b/POTATO_readme.txt new file mode 100644 index 0000000..bce00df --- /dev/null +++ b/POTATO_readme.txt @@ -0,0 +1,156 @@ +*************** README - POTATO *************** +This is the first version of POTATO and the pipeline is still in development. +Therefore, there might be still some issues to be solved. +The code is available on Github: https://github.com/lpekarek/POTATO +In case of encountering an issue, feel free to contact us at REMI@helmholtz-hiri.de + +POTATO -- 2021-02-22 -- Version 0.1 + Developed by Lukáš Pekárek and Stefan Buck at the Helmholtz Institute for RNA-based Infection Research + in the research group REMI - Recoding Mechanisms in Infections. + Supervisor - Jun. Prof. Neva Caliskan + + This script processes Force-Distance Optical Tweezers data in an automated way, to find unfolding events. + The script is developed to handle h5 raw data, produced by the C-Trap OT instrument from LUMICKS, + as well as any other FD data prepared in a CSV file (2 columns: Force(pN) - Distance(um)) + Furthermore the script can analyse single constant force markers. + The parameters can be changed in the GUI before each run. + Alternatively they can be changed permanently in the POTATO_config file. + + +***Dependencies*** +Python3 +Packages: + csv, + glob, + h5py, + json, + lumicks.pylake, + matplotlib, + multiprocessing, + numpy, + os, + pandas, + pathlib, + PIL, + scipy, + statistics, + time, + tkinter + +There is also a standalone executable POTATO version for Windows available. + + +***Navigation*** +The POTATO GUI is structured in four tabs: "Folder Analysis", "Show Single File", "Constant Force Analysis" and "Advanced Settings" +For each analysis step, buttons are displayed in the different tabs and some of them can also be found in the drop-down menu. + +***Folder Analysis*** +**Input** + POTATO supports three different input formats for folder analysis. + The appropriate dataset has to be selected prior to the analysis. + ATTENTION: When changing between input formats, all parameters are reset to the default values! + +1) High frequency (Piezo Distance): + Analyses the high frequency data (Piezo tracking) of all h5 files in a given directory. + The data gathering frequency is derived directly out of the h5 files. +2) Low Frequency + Analyses the low frequency data of all h5 files in a given directory. + The data gathering frequency is calculated directly out of the h5 files. +3) CSV (F/D) + Analyses all csv files in a given directory. + The architecture of these files need to consist out of two columns (Force and Distance) without headers. + The data gathering frequency and all other parameters are derived from the user input in the GUI. + +**Parameters** + Parameters can be changed directly in the Graphical User Interface (GUI). + For more setting options refer to the "Advanced Settings" tab, which includes all adjustable parameters. + Upon changing, each parameter needs to be validated with the key. + When the parameters are optimized, default parameters can be changed in the POTATO_config file, + so they will be loaded when the GUI is started. + +**Output** + POTATO creates an "Analysis" folder with timestamp in the analysed directory. + The "Refresh" button loads the last saved image and gives a progress report. + In the "Advanced Settings" tab, several export settings can be set. + +1) Processed FD data: + Exports the down-sampled and filtered Force-Distance-Array in CSV format. + The exported filename consists of the original filename and additional suffix "_smooth". +2) Plot + Exports a figure (PNG) containing - the visualized processed data with and without marked unfolding events + - the corresponding force- and distance derivations +3) Steps found + Exports the identified steps for each curve into a separate CSV file. + The step length does NOT correspond to the contour length change, but is only the distance difference between step start and step end. +4) Total results + Exports all steps from both derivations in CSV format. +5) Fitting + Exports a plot with the fitted models and a table of the fitting parameters for each section in CSV format. + When 'Fitting' is not selected, the script skips all fitting steps and therefore the analysis is much faster. + + +***Show Single File*** +**Input** + Single FD-curves of all three input formats (h5-HF, h5-LF, CSV) can be displayed. + +**Output** + A FD-curve of the original input values, as well as the down sampled values are plotted in the GUI. This may help identify potential causes of errors. + + +***Constant Force Analysis*** +**Input** + First the constant force marker should be displayed in a time-distance plot with adjacent histogram (Button -> Display Constant Force Data). + All three input formats (h5-HF, h5-LF, CSV) are supported. + After providing an initial guess of the fitting parameters, the histogram of the same file can be fitted with gaussians (Button -> Fit Constant Force Data). + +**Parameters** + The down sample and filtering parameters are applied also onto the constant force data. + This can have a huge impact on the displayed plot. + Furthermore, the axes have to be set manually and the fitting parameters have to be guessed (dependent on the # of expected gaussians). + To guess the fitting parameters, they are entered from lowest to highest values, separated with a comma. + +**Output** + POTATO creates an "Analysis_constantF" folder with timestamp in the analysed directory. + Both, the time-distance plot and the fitted histogram are exported as separate figures in PNG format. + In addition, the smoothened values as well as the histogram distribution are exported into CSV files. + Last, a table with values of the fitted parameters is exported in CSV format. + + +***Advanced Settings*** + This tab contains all the adjustable parameters in the POTATO. + The parameters are divided into several groups based on the part of analysis, in which they are used. +**Preprocessing** + Downsample rate - Only every nth value is taken for analysis; speeds up subsequent processing. + Butterworth Filter degree - Defines the stringency of the filter. + Cut off frequency - Signals with a frequency above this threshold are suppressed. + Force threshold, pN - Values with lower force are excluded from the analysis. +**Derivation** + Step d - Characterizes the interval between two values used for numerical derivative calculation. + Data frequency, Hz - The frequency at which data is recorded. +**Statistics** + z-score - The number of standard deviations used to determine whether a given value is part of a normal distribution. + moving median window size - The number of values considered for each median calculation. + SD difference threshold - Statistical analysis and data sorting are iterated until the difference between two consecutive SDs is below this value. +**Fitting** + "WLC+WLC" or "WLC+FJC" tick option - determines whether the unfolded regions of FD curves will be fitted with model combining two WLC models or WLC and FJC models, repectively. + dsLp, nm - Persistence length of the double-stranded (folded) part of the tethered construct. + dsLc, nm - Contour length of double-stranded (folded) part of the tethered construct. + dsK0, pN - Stretch modulus of double-stranded (folded) part of the tethered construct. + Force_offset, pN - Force offset of a given dataset; compensates for a shift in the dataset. + Distance_offset, nm - Distance offset of a given dataset; compensates for a shift in the dataset. + ssLp, nm - Persistence length of the single-stranded (unfolded) part of the tethered construct. + ssLc, nm - Contour length of single-stranded (unfolded) part of the tethered construct. + ssK0, pN - Stretch modulus of single-stranded (unfolded) part of the tethered construct. + +**Export** + Consists of tick options for marking the data files to be exported (ticked) or not (unticked) during the analysis. + Processed FD data - Exports the down-sampled and filtered Force-Distance-Array in CSV format.The exported filename consists of the original filename and additional suffix "_smooth". + Plot - Exports a figure (PNG) containing - the visualized processed data with and without marked unfolding events + - the corresponding force- and distance derivations + Steps found - Exports the identified steps for each curve into a separate CSV file. + The step length does NOT correspond to the contour length change, but is only the distance difference between step start and step end. + Total results - Exports all steps from both derivations in CSV format. + Fitting - Exports a plot with the fitted models and a table of the fitting parameters for each section in CSV format. + When 'Fitting' is not selected, the script skips all fitting steps and therefore the analysis is much faster. + +