From 52957755dd4f008df8b7e70843219d9c99425ef9 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 13:28:20 +0200 Subject: [PATCH 01/24] Improve cache_labeled_file() docstrings --- zeek-files-labeler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 88d3891..b26f825 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -422,7 +422,9 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): def cache_labeled_file(): """ - Read the labeled file and store the uid and labels in a dictionary + Read the labeled file and store the uid and labels in a dictionary. + - Input: global variable 'args.labeledfile' + - Output: labels_dict """ try: if args.verbose > 0: @@ -753,8 +755,6 @@ def process_zeekfolder(): # Close the outputfile output_file.close() - #print('Amount of lines read: {0}'.format(amount_lines_processed)) - except Exception as inst: exception_line = sys.exc_info()[2].tb_lineno print(f'Problem in process_zeekfolder() line {exception_line}', 0, 1) From fc4410c93c0f6b04f4d242fb5daf52db80e4f098 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:23:16 +0200 Subject: [PATCH 02/24] Improve define_type docstrings --- zeek-files-labeler.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index b26f825..b006700 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -252,10 +252,13 @@ def define_columns(headerline, filetype): def define_type(data): """ - Try to define very fast the type of input from :Zeek file, Suricata json, Argus binetflow CSV, Argus binetflow TSV - Using a Heuristic detection - Input: The first line after the headers if there were some, as 'data' - Outputs types can be can be: zeek-json, suricata, argus-tab, argus-csv, zeek-tab + Using heuristic detection, quickly determine the input type from the following options: + Zeek file, Suricata JSON, Argus binetflow CSV, or Argus binetflow TSV. + - Input: The first line after the headers if there were some, as 'data' + - Outputs types can be can be: zeek-json, suricata, argus-tab, argus-csv, zeek-tab + If input is JSON, it can be Zeek or Suricata + If input is CSV, it can be Argus + If input is TSV, it can be Argus or zeek """ try: # If line json, it can be Zeek or suricata From bbd5e871442979e817fc1b678f6c3078ca7aaca7 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:24:22 +0200 Subject: [PATCH 03/24] Improve comments on define_type() --- zeek-files-labeler.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index b006700..2c57524 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -260,31 +260,27 @@ def define_type(data): If input is CSV, it can be Argus If input is TSV, it can be Argus or zeek """ + input_type = 'unknown' try: - # If line json, it can be Zeek or suricata - # If line CSV, it can be Argus - # If line TSV, it can be Argus or zeek - - input_type = 'unknown' - - # Is it json? + # Validate if input is JSON try: json_line = json.loads(data) - # json + + # Determine if logs are Zeek or Suricata try: - # Zeek? + # Validate if input are Zeek JSON logs _ = json_line['ts'] input_type = 'zeek-json' return input_type except KeyError: - # Suricata? + # Validate if input are Suricata JSON logs? _ = json_line['timestamp'] input_type = 'suricata-json' return input_type + # Validate if input is CSV or TSV except json.JSONDecodeError: - # No json + # Validate if input is text based if type(data) == str: - # string nr_commas = len(data.split(',')) nr_tabs = len(data.split(' ')) if nr_commas > nr_tabs: @@ -307,6 +303,7 @@ def define_type(data): input_type = 'nfdump-tab' return input_type + # Returned guessed input log type except Exception as inst: exception_line = sys.exc_info()[2].tb_lineno From 02ea9ceca9289f21baa613ccc384e41f94bedc8e Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:25:02 +0200 Subject: [PATCH 04/24] Return input_type for all cases --- zeek-files-labeler.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 2c57524..c778e95 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -271,12 +271,10 @@ def define_type(data): # Validate if input are Zeek JSON logs _ = json_line['ts'] input_type = 'zeek-json' - return input_type except KeyError: # Validate if input are Suricata JSON logs? _ = json_line['timestamp'] input_type = 'suricata-json' - return input_type # Validate if input is CSV or TSV except json.JSONDecodeError: # Validate if input is text based @@ -302,8 +300,8 @@ def define_type(data): elif 'Date' in data: input_type = 'nfdump-tab' - return input_type # Returned guessed input log type + return input_type except Exception as inst: exception_line = sys.exc_info()[2].tb_lineno From 3bd5c961ed4cf7f833dc61b8b86ce04d60143719 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:25:14 +0200 Subject: [PATCH 05/24] Raise exception if no type is found --- zeek-files-labeler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index c778e95..20b9553 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -299,6 +299,8 @@ def define_type(data): input_type = 'zeek-tab' elif 'Date' in data: input_type = 'nfdump-tab' + else: + raise Exception("Unknown input logs type") # Returned guessed input log type return input_type From 48e99c82231e82ede4f5e6af628a21374ea73b9c Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:35:57 +0200 Subject: [PATCH 06/24] Improve output_netflow_line_to_file docstrings --- zeek-files-labeler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 20b9553..9d99bc2 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -37,8 +37,8 @@ def output_netflow_line_to_file(outputfile, originalline, filetype='', genericlabel='', detailedlabel=''): """ - Get data and store it on a new file - If genericlabel is empty, it is a headler line to process + Store the input line with its labels into the output file. If 'genericlabel' is empty, it means + the input line is a header line, and requires special processing. """ try: if 'csv' in filetype: From 6cd3c10f89fd417b937651e62dcd9774cc64ae88 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:36:17 +0200 Subject: [PATCH 07/24] Improve output_netflow_line_to_file comments --- zeek-files-labeler.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 9d99bc2..47af656 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -41,15 +41,15 @@ def output_netflow_line_to_file(outputfile, originalline, filetype='', genericla the input line is a header line, and requires special processing. """ try: + # Configure the field separator if 'csv' in filetype: separator = ',' elif 'tab' in filetype: separator = '\t' + # Validate if input line is a header line. Write all header lines back without change, + # except those Zeek headers that define fields and types. if type(originalline) == str and genericlabel == '': - # It is a headerline - - # Should we add the 'label' string? Zeek has many headerlines if '#fields' in originalline: outputline = originalline.strip() + separator + 'label' + separator + 'detailedlabel' + '\n' outputfile.writelines(outputline) @@ -58,14 +58,13 @@ def output_netflow_line_to_file(outputfile, originalline, filetype='', genericla outputfile.writelines(outputline) else: outputfile.writelines(originalline) - # We are not putting the 'label' string in the header! + # Validate if input line is a netflow line and store along with the new fields elif type(originalline) == str and genericlabel != '': - # These are values to store outputline = originalline.strip() + separator + genericlabel + separator + detailedlabel + '\n' outputfile.writelines(outputline) if args.debug > 1: print(f' [+] Wrote line: {outputline}') - # keep it open! + # do not close the file except Exception as inst: print('Problem in output_labeled_netflow_file()') From e4985f1c2e28ed0da735391986e6c53ed20a0e90 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:37:38 +0200 Subject: [PATCH 08/24] Improve output_netflow_line_to_file docstrings --- zeek-files-labeler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 47af656..ba42e97 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -39,6 +39,8 @@ def output_netflow_line_to_file(outputfile, originalline, filetype='', genericla """ Store the input line with its labels into the output file. If 'genericlabel' is empty, it means the input line is a header line, and requires special processing. + - Input: outpuffile, originalline, filetype, genericlabel and detailedlabel + - Output: no output """ try: # Configure the field separator From e20035817746eeb57151f8a098d71a18a22d4513 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:44:14 +0200 Subject: [PATCH 09/24] Improve define_columns docstrings --- zeek-files-labeler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index ba42e97..863052b 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -77,7 +77,11 @@ def output_netflow_line_to_file(outputfile, originalline, filetype='', genericla def define_columns(headerline, filetype): - """ Define the columns for Argus and Zeek-tab from the line received """ + """ + Define the columns for Argus and Zeek-tab from the line received + - Input: headerline, filetype + - Output: column_idx + """ # These are the indexes for later fast processing column_idx = {} column_idx['starttime'] = False From fbeeae4fee1f7e2f18d1331acf25017c48a3b0c4 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:44:30 +0200 Subject: [PATCH 10/24] Improve code readability --- zeek-files-labeler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 863052b..4aa44eb 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -124,6 +124,7 @@ def define_columns(headerline, filetype): separator = ',' elif 'tab' in filetype: separator = '\t' + nline = headerline.strip().split(separator) try: # Remove the extra column of zeek if it is there @@ -131,11 +132,14 @@ def define_columns(headerline, filetype): except ValueError: # ignore if #fields is not there pass + if args.debug > 1: print(f'Headers line: {nline}') + for field in nline: if args.debug > 2: print(f'Field: {field.lower()}, index: {nline.index(field)}') + if 'time' in field.lower() or field.lower() == 'ts': column_idx['starttime'] = nline.index(field) elif field.lower() == 'uid': From 185715e0b7ff7644e951462872980931f93ce13b Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:51:58 +0200 Subject: [PATCH 11/24] Improve tool description --- zeek-files-labeler.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 4aa44eb..9f2070d 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -17,12 +17,13 @@ # # # Authors: -# Sebastian Garcia, sebastian.garcia@agents.fel.cvut.cz, sgarcia@exa.unicen.edu.ar, eldraco@gmail.com -# Veronica Valeros, vero.valeros@gmail.com -# Stratosphere Laboratory, Czech Technical University in Prague +# Sebastian Garcia, sebastian.garcia@agents.fel.cvut.cz, eldraco@gmail.com +# Veronica Valeros, vero.valeros@gmail.com +# Stratosphere Laboratory, Czech Technical University in Prague # Description -# A tool to add labels in netflow files based on a configuration. Flow file include Zeek, Argus, and NFdump. Both in CSV and TSV +# A tool that effortlessly adds labels to netflow files. With support for Zeek, Argus, and NFdump +# formats in both CSV and TSV. import sys import json From 752aab47e897ab8467824bf0eaee7718161e79f4 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:52:29 +0200 Subject: [PATCH 12/24] Fix C0325 unnecessary parens on while statement --- zeek-files-labeler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 9f2070d..247c08b 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -339,7 +339,7 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): while '#' in line: line = input_file.readline() - while (line): + while line: # Count the first line amount_lines_processed += 1 From a9fbbc3878fe9379582a8b190d1c179b3e7450e4 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 14:59:16 +0200 Subject: [PATCH 13/24] Use isinstance() rather than type() for a typecheck --- zeek-files-labeler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 247c08b..ba190a5 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -52,7 +52,7 @@ def output_netflow_line_to_file(outputfile, originalline, filetype='', genericla # Validate if input line is a header line. Write all header lines back without change, # except those Zeek headers that define fields and types. - if type(originalline) == str and genericlabel == '': + if isinstance(originalline, str) and genericlabel == '': if '#fields' in originalline: outputline = originalline.strip() + separator + 'label' + separator + 'detailedlabel' + '\n' outputfile.writelines(outputline) @@ -62,7 +62,7 @@ def output_netflow_line_to_file(outputfile, originalline, filetype='', genericla else: outputfile.writelines(originalline) # Validate if input line is a netflow line and store along with the new fields - elif type(originalline) == str and genericlabel != '': + elif isinstance(originalline,str) and genericlabel != '': outputline = originalline.strip() + separator + genericlabel + separator + detailedlabel + '\n' outputfile.writelines(outputline) if args.debug > 1: @@ -246,7 +246,7 @@ def define_columns(headerline, filetype): # We need a temp dict because we can not change the size of dict while analyzing it temp_dict = {} for i in column_idx: - if type(column_idx[i]) == bool and column_idx[i] == False: + if isinstance(column_idx[i],bool) and column_idx[i] == False: continue temp_dict[i] = column_idx[i] column_idx = temp_dict @@ -288,7 +288,7 @@ def define_type(data): # Validate if input is CSV or TSV except json.JSONDecodeError: # Validate if input is text based - if type(data) == str: + if isinstance(data,str): nr_commas = len(data.split(',')) nr_tabs = len(data.split(' ')) if nr_commas > nr_tabs: From f52da415e767bbb951c3498b1bcac124548bc262 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:09:08 +0200 Subject: [PATCH 14/24] Better handling of undefined logs type --- zeek-files-labeler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index ba190a5..4b3f1ea 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -310,11 +310,11 @@ def define_type(data): elif 'Date' in data: input_type = 'nfdump-tab' else: - raise Exception("Unknown input logs type") + print("Exception in define_type(): unknown logs type.") + sys.exit(1) # Returned guessed input log type return input_type - except Exception as inst: exception_line = sys.exc_info()[2].tb_lineno print(f'\tProblem in define_type() line {exception_line}', 0, 1) From 78ef28066aa311107c4312cccbb974eaed2c551f Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:19:37 +0200 Subject: [PATCH 15/24] Fix W1514 unspecified-encoding --- zeek-files-labeler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 4b3f1ea..a4cce06 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -442,7 +442,7 @@ def cache_labeled_file(): # Open labeled flows file and get the columns try: - input_labeled_file = open(args.labeledfile,'r') + input_labeled_file = open(args.labeledfile,'r', encoding='utf-8') except Exception as inst: print('Some problem opening the input labeled netflow file. In cache_labeled_file()') print(type(inst)) # the exception instance @@ -559,7 +559,7 @@ def process_zeekfolder(): print(f'[+] Processing zeek file: {zeekfile_name}') try: - zeekfile = open(join(args.zeekfolder, zeekfile_name),'r') + zeekfile = open(join(args.zeekfolder, zeekfile_name),'r', encoding='utf-8') except Exception as inst: print(f'Some problem opening a zeek file {zeekfile_name}. In process_zeekfolder()') print(type(inst)) # the exception instance @@ -581,7 +581,7 @@ def process_zeekfolder(): print(f'[+] Type of flow file to label: {filetype}') # Create the output file for all cases - output_file = open(join(args.zeekfolder, zeekfile_name+'.labeled'),'w') + output_file = open(join(args.zeekfolder, zeekfile_name+'.labeled'),'w', encoding='utf-8') if args.debug > 1: print(f"[+] Output file created: {join(args.zeekfolder, zeekfile_name+'.labeled')}") From 1de3d73ccf6a3b67f83971471fbb3c0763898fff Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:19:55 +0200 Subject: [PATCH 16/24] Use f' on prints no .format() --- zeek-files-labeler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index a4cce06..73db0d0 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -532,7 +532,7 @@ def process_zeekfolder(): labels_dict = cache_labeled_file() if args.verbose > 0: - print('\n[+] Processing the zeek folder {0} for files to label'.format(args.zeekfolder)) + print(f"\n[+] Processing the zeek folder {args.zeekfolder} for files to label") # ----- Second, open each file in the folder, and label them. @@ -775,8 +775,8 @@ def process_zeekfolder(): if __name__ == '__main__': - print('Zeek Files labeler from labeled conn.log.labeled file. Version {}'.format(VERSION)) - print('https://stratosphereips.org') + print(f"Zeek Files labeler from labeled conn.log.labeled file. Version {VERSION}") + print("https://stratosphereips.org") # Parse the parameters parser = argparse.ArgumentParser(description="Given a conn.log.labeled file, copy those labels to the rest of the Zeek log files", add_help=False) From a73f59a8a03e8cd830dc41d60c72fabcf47f136f Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:22:37 +0200 Subject: [PATCH 17/24] Remove unused variable --- zeek-files-labeler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 73db0d0..907b580 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -467,7 +467,6 @@ def cache_labeled_file(): # Define the columns if filetype == 'zeek-json': input_labeled_file_column_idx = define_columns(headerline, filetype='json') - amount_lines_processed = 0 elif filetype == 'zeek-tab': # Get all the other headers first while '#types' not in headerline: From ffdea4d5de5543fdca382114b45fa46fb813fcf4 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:23:17 +0200 Subject: [PATCH 18/24] Raise exception if subprocess fails --- zeek-files-labeler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 907b580..333fc37 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -624,7 +624,7 @@ def process_zeekfolder(): #if args.verbose > 5: #print(f"[+] Greping {fingerprint} in file {join(args.zeekfolder, zeekfile_name)}") command = 'grep ' + fingerprint + ' ' + join(args.zeekfolder, 'ssl.log') - result = subprocess.run(command.split(), stdout=subprocess.PIPE) + result = subprocess.run(command.split(), stdout=subprocess.PIPE, check=True) result = result.stdout.decode('utf-8') #if args.verbose > 5: #print(f"\t[+] Result {result}") @@ -672,7 +672,7 @@ def process_zeekfolder(): #if args.verbose > 5: #print(f"[+] Greping {file_id} in file {join(args.zeekfolder, zeekfile_name)}") command = 'grep ' + file_id + ' ' + join(args.zeekfolder, 'files.log') - result = subprocess.run(command.split(), stdout=subprocess.PIPE) + result = subprocess.run(command.split(), stdout=subprocess.PIPE, check=True) result = result.stdout.decode('utf-8') #if args.verbose > 5: #print(f"\t[+] Result {result}") From d93f0e02b020d9e074f1091bc70fc2fb053605ec Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:28:01 +0200 Subject: [PATCH 19/24] Remove unnecessary pass --- zeek-files-labeler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 333fc37..d238308 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -419,7 +419,6 @@ def process_zeek(column_idx, input_file, output_file, labelmachine, filetype): elif 'json' in filetype: # Count the first line amount_lines_processed += 1 - pass return amount_lines_processed except Exception as inst: From cf0b57f81aacb51a7dffa0b1f65cae3dffef091f Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:28:10 +0200 Subject: [PATCH 20/24] Remove unused variable --- zeek-files-labeler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index d238308..3bfb1fc 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -589,7 +589,6 @@ def process_zeekfolder(): # ---- Define the columns of this file if filetype == 'zeek-json': column_idx = define_columns(headerline, filetype='json') - amount_lines_processed = 0 elif filetype == 'zeek-tab': # ---- Get all the headers lines and store them in the output file while '#types' not in headerline: From b8aba1e5f0405d9fdec5550f9b9b0d4f0fd4364a Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:28:57 +0200 Subject: [PATCH 21/24] Merged comparisons with 'in' --- zeek-files-labeler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 3bfb1fc..7e9278d 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -653,7 +653,7 @@ def process_zeekfolder(): # Because we create them sometimes from larger zeek files that were filtered pass line_to_label = zeekfile.readline().strip() - if zeekfile_name == 'ocsp.log' or zeekfile_name == 'pe.log': + if zeekfile_name in ('ocsp.log', 'pe.log'): line_to_label = zeekfile.readline().strip() while line_to_label and not '#' in line_to_label[0]: # Transform the line into an array From e359277b60bc83525d64ac12e30aa03be835a800 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:37:59 +0200 Subject: [PATCH 22/24] Better parsing of columns --- zeek-files-labeler.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 7e9278d..1cb6d7d 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -245,10 +245,11 @@ def define_columns(headerline, filetype): # If not we will believe that we have data on them # We need a temp dict because we can not change the size of dict while analyzing it temp_dict = {} - for i in column_idx: - if isinstance(column_idx[i],bool) and column_idx[i] == False: + for key, value in column_idx.items(): + if isinstance(value,bool) and value is False: continue - temp_dict[i] = column_idx[i] + temp_dict[key] = value + column_idx = temp_dict return column_idx From a96d0109199a62badc2a2d95705994ef98132d67 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:39:44 +0200 Subject: [PATCH 23/24] add main docstring --- zeek-files-labeler.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 1cb6d7d..0507ef8 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -16,14 +16,18 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # -# Authors: -# Sebastian Garcia, sebastian.garcia@agents.fel.cvut.cz, eldraco@gmail.com -# Veronica Valeros, vero.valeros@gmail.com -# Stratosphere Laboratory, Czech Technical University in Prague - -# Description -# A tool that effortlessly adds labels to netflow files. With support for Zeek, Argus, and NFdump -# formats in both CSV and TSV. +""" +Zeek files labeler + +A tool that effortlessly adds labels to netflow files. With support for Zeek, Argus, and NFdump +formats in both CSV and TSV. + +Authors: + Sebastian Garcia, sebastian.garcia@agents.fel.cvut.cz, eldraco@gmail.com + Veronica Valeros, vero.valeros@gmail.com + Stratosphere Laboratory, Czech Technical University in Prague +""" + import sys import json From 028b2c5edb7042baabbf55aa5ff97203535e083d Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 21 May 2023 15:47:40 +0200 Subject: [PATCH 24/24] Fix E713 Test for membership should be `not in` --- zeek-files-labeler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zeek-files-labeler.py b/zeek-files-labeler.py index 0507ef8..23ec37d 100755 --- a/zeek-files-labeler.py +++ b/zeek-files-labeler.py @@ -488,7 +488,7 @@ def cache_labeled_file(): inputline = input_labeled_file.readline() lines_with_labels_read = 0 - while inputline and not '#' in inputline: + while inputline and '#' not in inputline: # Transform the line into an array line_values = inputline.split(input_labeled_file_separator) if args.debug > 8: @@ -612,7 +612,7 @@ def process_zeekfolder(): if zeekfile_name == 'x509.log': line_to_label = zeekfile.readline().strip() - while line_to_label and not '#' in line_to_label[0]: + while line_to_label and '#' not in line_to_label[0]: # Transform the line into an array line_values = line_to_label.split(zeek_file_file_separator) if args.debug > 5: @@ -660,7 +660,7 @@ def process_zeekfolder(): line_to_label = zeekfile.readline().strip() if zeekfile_name in ('ocsp.log', 'pe.log'): line_to_label = zeekfile.readline().strip() - while line_to_label and not '#' in line_to_label[0]: + while line_to_label and '#' not in line_to_label[0]: # Transform the line into an array line_values = line_to_label.split(zeek_file_file_separator) if args.debug > 5: @@ -713,7 +713,7 @@ def process_zeekfolder(): # Read each line of the labeled file and get the zeek uid line_to_label = zeekfile.readline().strip() - while line_to_label and not '#' in line_to_label[0]: + while line_to_label and '#' not in line_to_label[0]: # Transform the line into an array line_values = line_to_label.split(zeek_file_file_separator) if args.debug > 5: