From a7cfbc18621232dc8f7cfe61cc3133384c3fb6e0 Mon Sep 17 00:00:00 2001 From: Nuno Francisco Moreira Date: Mon, 20 Feb 2023 23:03:24 +0100 Subject: [PATCH 1/4] [FEAT] Output path as an argument Allows user to set a different output path besides loot. If none is specified loot will be used. --- jir_thief.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/jir_thief.py b/jir_thief.py index 1a48faa..165fae4 100644 --- a/jir_thief.py +++ b/jir_thief.py @@ -1,4 +1,4 @@ -import requests, json, sys, getopt, time +import requests, json, sys, getopt, time, os # Set that holds all of the issues found in the keyword search issueSet = set() @@ -33,7 +33,7 @@ def getNumberOfPages(query, username, access_token, cURL): totalSize = int(jsonResp["total"]) return totalSize -def downloadContent(username, access_token, cURL): +def downloadContent(username, access_token, cURL, output_dir): # https://yourorg.atlassian.net/si/jira.issueviews:issue-word/KEY-123/KEY-123.doc headers = form_token_headers print('[*] Downloading files') @@ -48,7 +48,7 @@ def downloadContent(username, access_token, cURL): headers=headers ) - path = "loot/{KEY}.doc".format(KEY=issueKey) + path = "{OUTDIR}/{KEY}.doc".format(OUTDIR=output_dir, KEY=issueKey) with open(path, 'wb') as f: f.write(response.content) print('[*] Downloaded {count} of {set_length} files: {KEY}.doc'.format(count=count, set_length=set_length, KEY=issueKey)) @@ -116,9 +116,10 @@ def main(): username = "" access_token = "" user_agent = "" + output_dir = "" # usage - usage = '\nusage: python3 jir_thief.py [-h] -j -u -p -d [-a] ""' + usage = '\nusage: python3 jir_thief.py [-h] -j -u -p -d [-a] "" [-o] ' #help help = '\nThis Module will connect to Jira\'s API using an access token, ' @@ -140,11 +141,12 @@ def main(): help += '\n\t\tThe User-Agent string you wish to send in the http request.' help += '\n\t\tYou can use the latest chrome for MacOS for example: -a "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"' help += '\n\t\tDefault is "python-requests/2.25.1"' + help += '\n\n\t-o, --output-dir\n\t\t Output path to use instead of loot\n' help += '\n\n\t-h, --help\n\t\tshow this help message and exit\n' # try parsing options and arguments try : - opts, args = getopt.getopt(sys.argv[1:], "hj:u:p:d:a:", ["help", "url=", "user=", "accesstoken=", "dict=", "user-agent="]) + opts, args = getopt.getopt(sys.argv[1:], "hj:u:p:d:a:o:", ["help", "url=", "user=", "accesstoken=", "dict=", "user-agent=", "output-dir="]) except getopt.GetoptError as err: print(str(err)) print(usage) @@ -163,6 +165,10 @@ def main(): dict_path = arg if opt in ("-a", "--user-agent"): user_agent = arg + if opt in ("-o", "--output-dir"): + output_dir = arg + if not os.path.isdir(output_dir): + os.mkdir(output_dir) # check for mandatory arguments if not username: @@ -193,8 +199,12 @@ def main(): default_headers['User-Agent'] = user_agent form_token_headers['User-Agent'] = user_agent + # Set default loot path + if not output_dir: + output_dir = 'loot' + searchKeyWords(dict_path, username, access_token, cURL) - downloadContent(username, access_token, cURL) + downloadContent(username, access_token, cURL, output_dir) if __name__ == "__main__": From 6231b67e15e8d4ec13cb69eb777fef6ff97d2bc0 Mon Sep 17 00:00:00 2001 From: Nuno Francisco Moreira Date: Tue, 21 Feb 2023 01:30:46 +0100 Subject: [PATCH 2/4] Create wrapper.py --- wrapper.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 wrapper.py diff --git a/wrapper.py b/wrapper.py new file mode 100644 index 0000000..30b15a8 --- /dev/null +++ b/wrapper.py @@ -0,0 +1,107 @@ +''' +Wrapper to run jir_thief.py using multiple threads +Splits huge dictionary into one word files +outputs to path with matched keyword on the folder name for ease of analysis +''' +import os +import shutil +from datetime import datetime +import subprocess +from multiprocessing import Pool +from argparse import ArgumentParser + + +def run_stuff(keywords_lst): + ''' Define Worker pool with keyword list ''' + worker_pool = Pool(4) + worker_pool.map(run_scrapper, keywords_lst) + + +def run_scrapper(keyword_to_test): + ''' Run jir_thief with the variables defined in argparser + and the keyword file generated ''' + + print(f'Testing {keyword_to_test}.') + dict_file = f'dictionaries/{keyword_to_test}.txt' + loot_path = args.loot_dir + '/' + keyword_to_test + cmd = f'python3 jir_thief.py -j {args.cURL} -u {args.username} \ + -p {args.access_token} -d {dict_file} -o {loot_path}' + process = subprocess.Popen(cmd, shell=True, stdout=subprocess.DEVNULL) + process.wait() + print(f'Tested {keyword_to_test}.') + + +if __name__ == '__main__': + + parser = ArgumentParser() + optional = parser._action_groups.pop() + required = parser.add_argument_group('required arguments') + + required.add_argument('-d', '--dict', dest='dict_path', + help='path to dictionary', required=True) + required.add_argument('-j', '--url', + dest='cURL', + help='target URL', required=True) + required.add_argument('-u', '--user', + dest='username', + help='Account username', required=True) + required.add_argument('-p', '--accesstoken', + dest='access_token', + help='Account access token', required=True) + optional.add_argument( + '-o', + '--output-dir', + dest='loot_dir', + default='loot', + help='loot output directory') + parser._action_groups.append(optional) + args = parser.parse_args() + + loot_dir = args.loot_dir + dictionary_file = args.dict_path + + with open(dictionary_file, 'r', encoding='utf-8') as whole_dict_file: + for line in whole_dict_file: + + search_term = line.strip() + # path for the loot of specific keyword + keyword_loot_dir = loot_dir + '/' + search_term + + # create temp keyword dictionaries + keyword_dict_file = 'dictionaries/' + search_term + '.txt' + with open(keyword_dict_file, 'w', encoding='utf-8') as temp_keyword_file: + temp_keyword_file.write(line) + + # check if destination keyword loot dir exist + if os.path.isdir(keyword_loot_dir): + # check if destination keyword loot dir is empty + if not os.listdir(keyword_loot_dir): + print(f'Directory {keyword_loot_dir} is empty') + + # if destination keyword loot dir is not empty, move it to + # backup and empty it + else: + print(f'Directory {keyword_loot_dir} is not empty') + print(f'Backing up {keyword_loot_dir} and deleting it') + running_date = datetime.utcnow().strftime('%Y-%m-%d-%H-%M') # catch date + + # backup path is keyword_loot_dir + _date + keyword_loot_dir_backup = keyword_loot_dir + '_' + running_date + # move loot to keyword_loot_dir_date/loot + shutil.move(keyword_loot_dir, keyword_loot_dir_backup) + + print(f'{keyword_loot_dir_backup} created') + print(f'{keyword_loot_dir} deleted') + # recreate loot + os.mkdir(keyword_loot_dir) + print(f'{keyword_loot_dir} recreated') + + else: + print(f'{keyword_loot_dir} doesn\'t exist. Creating it.') + os.mkdir(keyword_loot_dir) + print(f'{keyword_loot_dir} created.') + + with open(dictionary_file, 'r', encoding='utf-8') as whole_dict_file: + keywords_list = whole_dict_file.read().splitlines() + + run_stuff(keywords_list) From 2e00d9be106cdc19b712401ec0b9035e84f4d782 Mon Sep 17 00:00:00 2001 From: Nuno Francisco Moreira Date: Tue, 21 Feb 2023 01:39:40 +0100 Subject: [PATCH 3/4] Delete wrapper.py --- wrapper.py | 107 ----------------------------------------------------- 1 file changed, 107 deletions(-) delete mode 100644 wrapper.py diff --git a/wrapper.py b/wrapper.py deleted file mode 100644 index 30b15a8..0000000 --- a/wrapper.py +++ /dev/null @@ -1,107 +0,0 @@ -''' -Wrapper to run jir_thief.py using multiple threads -Splits huge dictionary into one word files -outputs to path with matched keyword on the folder name for ease of analysis -''' -import os -import shutil -from datetime import datetime -import subprocess -from multiprocessing import Pool -from argparse import ArgumentParser - - -def run_stuff(keywords_lst): - ''' Define Worker pool with keyword list ''' - worker_pool = Pool(4) - worker_pool.map(run_scrapper, keywords_lst) - - -def run_scrapper(keyword_to_test): - ''' Run jir_thief with the variables defined in argparser - and the keyword file generated ''' - - print(f'Testing {keyword_to_test}.') - dict_file = f'dictionaries/{keyword_to_test}.txt' - loot_path = args.loot_dir + '/' + keyword_to_test - cmd = f'python3 jir_thief.py -j {args.cURL} -u {args.username} \ - -p {args.access_token} -d {dict_file} -o {loot_path}' - process = subprocess.Popen(cmd, shell=True, stdout=subprocess.DEVNULL) - process.wait() - print(f'Tested {keyword_to_test}.') - - -if __name__ == '__main__': - - parser = ArgumentParser() - optional = parser._action_groups.pop() - required = parser.add_argument_group('required arguments') - - required.add_argument('-d', '--dict', dest='dict_path', - help='path to dictionary', required=True) - required.add_argument('-j', '--url', - dest='cURL', - help='target URL', required=True) - required.add_argument('-u', '--user', - dest='username', - help='Account username', required=True) - required.add_argument('-p', '--accesstoken', - dest='access_token', - help='Account access token', required=True) - optional.add_argument( - '-o', - '--output-dir', - dest='loot_dir', - default='loot', - help='loot output directory') - parser._action_groups.append(optional) - args = parser.parse_args() - - loot_dir = args.loot_dir - dictionary_file = args.dict_path - - with open(dictionary_file, 'r', encoding='utf-8') as whole_dict_file: - for line in whole_dict_file: - - search_term = line.strip() - # path for the loot of specific keyword - keyword_loot_dir = loot_dir + '/' + search_term - - # create temp keyword dictionaries - keyword_dict_file = 'dictionaries/' + search_term + '.txt' - with open(keyword_dict_file, 'w', encoding='utf-8') as temp_keyword_file: - temp_keyword_file.write(line) - - # check if destination keyword loot dir exist - if os.path.isdir(keyword_loot_dir): - # check if destination keyword loot dir is empty - if not os.listdir(keyword_loot_dir): - print(f'Directory {keyword_loot_dir} is empty') - - # if destination keyword loot dir is not empty, move it to - # backup and empty it - else: - print(f'Directory {keyword_loot_dir} is not empty') - print(f'Backing up {keyword_loot_dir} and deleting it') - running_date = datetime.utcnow().strftime('%Y-%m-%d-%H-%M') # catch date - - # backup path is keyword_loot_dir + _date - keyword_loot_dir_backup = keyword_loot_dir + '_' + running_date - # move loot to keyword_loot_dir_date/loot - shutil.move(keyword_loot_dir, keyword_loot_dir_backup) - - print(f'{keyword_loot_dir_backup} created') - print(f'{keyword_loot_dir} deleted') - # recreate loot - os.mkdir(keyword_loot_dir) - print(f'{keyword_loot_dir} recreated') - - else: - print(f'{keyword_loot_dir} doesn\'t exist. Creating it.') - os.mkdir(keyword_loot_dir) - print(f'{keyword_loot_dir} created.') - - with open(dictionary_file, 'r', encoding='utf-8') as whole_dict_file: - keywords_list = whole_dict_file.read().splitlines() - - run_stuff(keywords_list) From 1fb10ad169bb32a954af20fb3f21e3b6dfc2b121 Mon Sep 17 00:00:00 2001 From: Nuno Francisco Moreira Date: Tue, 21 Feb 2023 03:23:29 +0100 Subject: [PATCH 4/4] [FIX] Handle empty response from query handle with error without breaking the script. json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) --- jir_thief.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/jir_thief.py b/jir_thief.py index 165fae4..c428cad 100644 --- a/jir_thief.py +++ b/jir_thief.py @@ -89,16 +89,17 @@ def searchKeyWords(path, username, access_token, cURL): headers=default_headers, params=searchQuery ) - - jsonResp = json.loads(response.text) - if jsonResp['total']: - issues = jsonResp['issues'] - for issue in issues: - issueKey = issue['key'] - issueSet.add(issueKey) - - - start_point += 100 + try: + jsonResp = json.loads(response.text) + if jsonResp['total']: + issues = jsonResp['issues'] + for issue in issues: + issueKey = issue['key'] + issueSet.add(issueKey) + start_point += 100 + except: + print("[*] Error gathering issues.") + start_point += 100 if len(issueSet) > tempSetCount: count = len(issueSet) - tempSetCount