diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index 87179d37..7e2d81c1 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -32,6 +32,11 @@ jobs: with: src: "dns_entry_checker" + - name: Word Cloud Generator + uses: psf/black@stable + with: + src: "word_cloud_generator" + - name: JSM Metric Collection uses: psf/black@stable with: diff --git a/.github/workflows/word_cloud_generator.yaml b/.github/workflows/word_cloud_generator.yaml new file mode 100644 index 00000000..64120189 --- /dev/null +++ b/.github/workflows/word_cloud_generator.yaml @@ -0,0 +1,33 @@ +name: Word Cloud Generator Unittest + +on: + push: + branches: + - master + pull_request: + paths: + - "word_cloud_generator/**" + - ".github/workflows/word_cloud_generator.yaml" + +jobs: + test_with_unit_test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r word_cloud_generator/requirements.txt + - name: Test with unittest + run: | + cd word_cloud_generator + python3 -m unittest test_word_cloud_generator.py \ No newline at end of file diff --git a/word_cloud_generator/__init__.py b/word_cloud_generator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/word_cloud_generator/readme.md b/word_cloud_generator/readme.md new file mode 100644 index 00000000..98252e9b --- /dev/null +++ b/word_cloud_generator/readme.md @@ -0,0 +1,29 @@ +# Word Cloud Generator + +## General Info + +This is a Python script that when run, creates a filter word cloud from the summary of tickets over a time period. + +The script takes ~10 seconds to complete on a month of tickets + +Unit tests exist which test the logic of the methods the script uses, and the tests should be run whenever changes are made to the code. + +## Requirements + +requests: 2.31.0 +parameterized: 0.9.0 +python-dateutil: 2.8.2 +wordcloud: 1.9.2 + +## Setup +Running the script: +``` +$ cd ../word_cloud_generator +$ pip install -r requirements.txt +$ python3 word_cloud_generator.py +``` + +Running the unit tests: +``` +$ python3 -m unittest discover -s ./test -p "test_*.py" +``` \ No newline at end of file diff --git a/word_cloud_generator/requirements.txt b/word_cloud_generator/requirements.txt new file mode 100644 index 00000000..693c8a1b --- /dev/null +++ b/word_cloud_generator/requirements.txt @@ -0,0 +1,5 @@ +requests +parameterized +python-dateutil +wordcloud +mashumaro \ No newline at end of file diff --git a/word_cloud_generator/test_word_cloud_generator.py b/word_cloud_generator/test_word_cloud_generator.py new file mode 100644 index 00000000..130d5892 --- /dev/null +++ b/word_cloud_generator/test_word_cloud_generator.py @@ -0,0 +1,283 @@ +from unittest import mock +from unittest.mock import MagicMock, patch +from parameterized import parameterized +from datetime import datetime + +import requests +import requests.auth +import word_cloud_generator +import unittest + + +class ChangingJson: + """ + Class to represent a json object which changes value when it's called. + """ + + def __init__(self, values): + """ + Constructs the attributes for the ChangingJson object + :param values: The values for the ChangingJson to change through (list) + """ + self.values = values + self.current_index = 0 + + def get(self, get_value): + """ + Function to emulate the Json "Get" function while cycling through the values + :param get_value: The value to requested (any) + :return: The next value currently stored in the list (any) + """ + return_value = self.values[self.current_index].get(get_value) + if get_value == "size": + self.current_index = (self.current_index + 1) % len(self.values) + return return_value + + +auth = requests.auth.HTTPBasicAuth("test_username", "test_password") +headers = { + "Accept": "application/json", +} +host = "https://test.com" + + +class WorldCloudGeneratorTests(unittest.TestCase): + """ + Class for the test to be run against the functions from word_cloud_generator.py + """ + + @parameterized.expand( + [ + ("check found", "something-else", True), + ("check not found", b'{"status":"RUNNING"}', False), + ] + ) + def test_get_response_json(self, __, session_response_return_value, expected_out): + """ + Function to test the functionality of get_response_json by asserting that the function + calls a specific function or raises a Timeout error + :param __: The name of the parameter, which is thrown away (string) + :param session_response_return_value: The mocked return value for the + session response (string) + :param expected_out: The expected output of the function (bool) + """ + with mock.patch("word_cloud_generator.requests") and patch( + "word_cloud_generator.json" + ): + word_cloud_generator.requests.session = MagicMock() + word_cloud_generator.requests.session.return_value.get.return_value.content = ( + session_response_return_value + ) + + word_cloud_generator.json = MagicMock() + + if expected_out: + word_cloud_generator.get_response_json(auth, headers, host) + + word_cloud_generator.json.loads.assert_called_once() + else: + self.assertRaises( + requests.exceptions.Timeout, + word_cloud_generator.get_response_json, + auth, + headers, + host, + ) + + @parameterized.expand( + [ + ("dates valid", "2022-01-01", ["test1", "test2", "test3", "test4"]), + ("dates invalid", "2024-01-01", []), + ] + ) + def test_get_issues_contents_after_time(self, __, filter_date, expected_out): + """ + Function to test the functionality of get_issues_contents_after_time by asserting + that the value returned is expected + :param __: The name of the parameter, which is thrown away (string) + :param filter_date: The mocked date to filter after (list) + :param expected_out: The expected output of the function (bool) + """ + with mock.patch("word_cloud_generator.get_response_json"), mock.patch( + "word_cloud_generator.filter_issue" + ): + issue_filter = word_cloud_generator.from_user_inputs( + **{ + "output": None, + "start_date": None, + "end_date": filter_date, + "word_cloud": None, + } + ) + values = ChangingJson( + ( + { + "values": ( + { + "fields": { + "summary": "test1", + "created": "2023-01-01T00:00:00", + } + }, + { + "fields": { + "summary": "test2", + "created": "2023-01-01T00:00:00", + } + }, + ), + "size": 50, + }, + { + "values": ( + { + "fields": { + "summary": "test3", + "created": "2023-01-01T00:00:00", + } + }, + { + "fields": { + "summary": "test4", + "created": "2023-01-01T00:00:00", + } + }, + ), + "size": 32, + }, + ) + ) + word_cloud_generator.get_response_json.return_value = values + word_cloud_generator.filter_issue.return_value = True + self.assertEqual( + word_cloud_generator.get_issues_contents_after_time( + auth, + headers, + host, + issue_filter, + ), + expected_out, + ) + + @parameterized.expand( + [ + ( + "dates valid", + { + "output": None, + "end_date": None, + "word_cloud": None, + "start_date": "2024-01-01", + "assigned": "test", + }, + True, + ), + ( + "dates invalid", + { + "output": None, + "end_date": None, + "word_cloud": None, + "start_date": "2022-01-01", + "assigned": "test", + }, + False, + ), + ( + "assigned valid", + { + "output": None, + "end_date": None, + "word_cloud": None, + "start_date": "2024-01-01", + "assigned": "test", + }, + True, + ), + ( + "assigned invalid", + { + "output": None, + "end_date": None, + "word_cloud": None, + "start_date": "2024-01-01", + "assigned": "test failed", + }, + False, + ), + ] + ) + def test_filter_issue(self, __, issue_filter, expected_out): + """ + Function to test the functionality of filter_issue by asserting + that the value returned is expected + :param __: The name of the parameter, which is thrown away (string) + :param issue_filter: The issue filter (dict) + :param expected_out: The expected output of the function (bool) + """ + issue = {"fields": {"assignee": {"displayName": "test"}}} + issue_date = datetime.strptime("2023-01-01", "%Y-%m-%d") + issue_filter = word_cloud_generator.from_user_inputs(**issue_filter) + self.assertEqual( + word_cloud_generator.filter_issue( + issue, + issue_filter, + issue_date, + ), + expected_out, + ) + + def test_generate_word_cloud(self): + """ + Function to test the functionality of generate_word_cloud by asserting that the function + is called with specific inputs + """ + with mock.patch("word_cloud_generator.filter_word_cloud"), mock.patch( + "word_cloud_generator.WordCloud" + ): + issues_contents = "test data" + issue_filter = "" + word_cloud_output_location = "test" + word_cloud_parameters = { + "width": 2000, + "height": 1000, + "min_font_size": 25, + "max_words": 10000, + } + word_cloud_generator.generate_word_cloud( + issues_contents, + issue_filter, + word_cloud_output_location, + **word_cloud_parameters, + ) + word_cloud_generator.WordCloud.return_value.generate.assert_called_with( + word_cloud_generator.filter_word_cloud.return_value + ) + word_cloud_generator.WordCloud.return_value.to_file.assert_called_with( + "test" + ) + + def test_filter_word_cloud(self): + """ + Function to test the functionality of generate_word_cloud by asserting that the function + returns an expected value + """ + issue_filter = word_cloud_generator.from_user_inputs( + **{ + "output": None, + "start_date": None, + "end_date": None, + "word_cloud": None, + "filter_not": "delete|this", + "filter_for": "data|test|this|here|delete|not", + } + ) + issues_contents = "test data delete this not here" + self.assertEqual( + word_cloud_generator.filter_word_cloud(issue_filter, issues_contents), + "test data not here", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/word_cloud_generator/word_cloud_generator.py b/word_cloud_generator/word_cloud_generator.py new file mode 100644 index 00000000..5d8ce491 --- /dev/null +++ b/word_cloud_generator/word_cloud_generator.py @@ -0,0 +1,312 @@ +from argparse import ArgumentParser, RawDescriptionHelpFormatter +from datetime import datetime +from dateutil.relativedelta import relativedelta +from pathlib import Path +from sys import argv +from time import sleep +from os import path +from wordcloud import WordCloud +from typing import Optional, Dict, List +from dataclasses import dataclass +from mashumaro import DataClassDictMixin +import re +import json +import requests + + +@dataclass +class IssuesFilter(DataClassDictMixin): + output: str + start_date: str + end_date: str + word_cloud: str + assigned: Optional[str] = None + filter_for: Optional[str] = None + filter_not: Optional[str] = None + + +def from_user_inputs(**kwargs): + """ + Take the inputs from an argparse and populate a IssuesFilter dataclass and return it + :param kwargs: a dictionary of argparse values + """ + + return IssuesFilter(**kwargs) + + +def parse_args(inp_args: Dict) -> Dict: + """ + Function to parse commandline args + :param inp_args: a set of commandline args to parse + :returns: A dictionary of parsed args + """ + # Get arguments passed to the script + parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter) + + parser.add_argument( + "-u", + "--username", + metavar="USERNAME", + help="FedID of the user", + required=True, + ) + parser.add_argument( + "-p", + "--password", + metavar="PASSWORD", + help="Password of the user", + required=True, + ) + parser.add_argument( + "-o", + "--output", + metavar="OUTPUT", + help="Directory to create the output files in", + default="output", + ) + default_value_start_date = datetime.now().strftime("%Y-%m-%d") + parser.add_argument( + "-s", + "--start_date", + metavar="START_DATE", + help="Date to get issues from", + default=default_value_start_date, + ) + default_value_end_date = (datetime.now() - relativedelta(months=1)).strftime( + "%Y-%m-%d" + ) + parser.add_argument( + "-e", + "--end_date", + metavar="END_DATE", + help="Date to get issues to", + default=default_value_end_date, + ) + parser.add_argument( + "-a", + "--assigned", + metavar="ASSIGNED", + help="Assigned user to get tickets from", + ) + parser.add_argument( + "-f", + "--filter_for", + metavar="FILTER_FOR", + help="Strings to filter the word cloud for", + ) + parser.add_argument( + "-n", + "--filter_not", + metavar="FILTER_NOT", + help="Strings to filter the word cloud to not have", + ) + parser.add_argument( + "-w", + "--word_cloud", + metavar="WORD_CLOUD", + help="Parameters to create the word cloud with", + default="2000, 1000, 25, 10000", + ) + args = parser.parse_args(inp_args) + return args + + +def get_response_json(auth, headers: Dict, url: str) -> Dict: + """ + Function to send a get request to a url and return the response as json + :param auth: A HTTPBasicAuth object for authentication (HTTPBasicAuth) + :param headers: A request Header + :param url: The URL to send the request + :returns: A dictionary of JSON values + """ + session = requests.session() + session.headers = headers + session.auth = auth + + attempts = 5 + response = None + + while attempts > 0: + response = session.get(url, timeout=5) + if ( + response.content != b'{"status":"RUNNING"}' + and response.content != b'{"status":"ENQUEUED"}' + ): + break + else: + sleep(1) + attempts = attempts - 1 + + if attempts == 0: + raise requests.exceptions.Timeout( + "Get request status not completed before timeout" + ) + + return json.loads(response.text) + + +def get_issues_contents_after_time( + auth, headers: Dict, host: str, issue_filter: Dict +) -> List: + """ + Function to get the contents of through issues using a loop, as only 50 can be checked at a time + :param issue_filter: Dict of filters to check the issues against + :param auth: A HTTPBasicAuth object for authentication (HTTPBasicAuth) + :param headers: A request Header + :param host: The host used to create the URL to send the request + :returns: A list with the contents of all valid issues + """ + curr_marker = 0 + check_limit = 50 + issues_contents = [] + while True: + url = f"{host}/rest/servicedeskapi/servicedesk/6/queue/182/issue?start={curr_marker}" + json_load = get_response_json(auth, headers, url) + issues = json_load.get("values") + issues_length = json_load.get("size") + for i, issue in enumerate(issues, 1): + issue_date = datetime.strptime( + issue.get("fields").get("created")[:10], "%Y-%m-%d" + ) + if issue_date < datetime.strptime(issue_filter.end_date, "%Y-%m-%d"): + return issues_contents + if filter_issue(issue, issue_filter, issue_date): + issue_contents = issue.get("fields").get("summary") + if issue_contents: + issues_contents.append(issue_contents) + + # break out of the loop if we reach the end of the issue list + if issues_length < check_limit: + break + curr_marker += issues_length + return issues_contents + + +def filter_issue(issue: Dict, issue_filter: Dict, issue_date: str) -> bool: + """ + Function to check if an issue passes the set filters + :param issue: A dict of an issues contents + :param issue_filter: Dict of filters to check the issues against + :param issue_date: The date that the issue was created + :returns: If the issue passes the filters + """ + fields = issue.get("fields", None) + if not fields: + return False + + assignee = fields.get("assignee", None) + if not assignee: + return False + + issue_assigned = assignee.get("displayName", None) + assign_check = issue_filter.assigned + if (not issue_assigned or issue_assigned != assign_check) and assign_check: + return False + + if issue_date > datetime.strptime(issue_filter.start_date, "%Y-%m-%d"): + return False + return True + + +def generate_word_cloud( + issues_contents: List, issue_filter: Dict, word_cloud_output_location, **kwargs +): + """ + Function to generate and save a word cloud + :param issues_contents: The summary of every valid issue + :param issue_filter: Dict of filters to check the issues against + :param word_cloud_output_location: The output location for the word cloud to be saved to + :param kwargs: A set of kwargs to pass to WordCloud + - width + - height + - min_font_size + - max_words + """ + matches = re.findall(r"((\w+([.'](?![ \n']))*[-_]*)+)", issues_contents) + # Regex to find all words and include words joined with certain characters, while not + # allowing certain characters to exist at the start or end of the word, such as dots. + if matches: + issues_contents = " ".join(list(list(zip(*matches))[0])) + issues_contents = filter_word_cloud(issue_filter, issues_contents) + word_cloud = WordCloud( + width=kwargs["width"], + height=kwargs["height"], + min_font_size=kwargs["min_font_size"], + max_words=kwargs["max_words"], + background_color="white", + collocations=False, + regexp=r"\w*\S*", + ) + + word_cloud.generate(issues_contents) + + word_cloud.to_file(word_cloud_output_location) + + +def filter_word_cloud(issue_filter: Dict, issues_contents: List): + """ + Function to filter the contents of the word cloud to or against certain strings + :param issues_contents: The summary of every valid issue + :param issue_filter: Dict of filters to check the issues against + :returns: The filtered issues contents + """ + if issue_filter.filter_not: + issues_contents = re.sub( + issue_filter.filter_not.lower(), "", issues_contents, flags=re.I + ) + if issue_filter.filter_for: + issues_contents = " ".join( + re.findall( + issue_filter.filter_for.lower(), + issues_contents, + flags=re.IGNORECASE, + ) + ) + + return issues_contents + + +def word_cloud_generator(): + """ + Function to take arguments, generate the output location and run the + functions to the data for the word cloud and generate it + """ + args = parse_args(argv[1:]) + host = "https://stfc.atlassian.net" + username = args.username + password = args.password + + issue_filter = from_user_inputs(vars(args)) + + parameters_list = issue_filter.word_cloud.split(", ") + word_cloud_parameters = { + "width": int(parameters_list[0]), + "height": int(parameters_list[1]), + "min_font_size": int(parameters_list[2]), + "max_words": int(parameters_list[3]), + } + + Path(issue_filter.output).mkdir(exist_ok=True) + + word_cloud_output_location = path.join( + issue_filter.output, + f"word cloud - {datetime.now().strftime('%Y.%m.%d.%H.%M.%S')}.png", + ) + + auth = requests.auth.HTTPBasicAuth(username, password) + headers = { + "Accept": "application/json", + } + + issues_contents = get_issues_contents_after_time(auth, headers, host, issue_filter) + + generate_word_cloud( + " ".join(issues_contents), + issue_filter, + word_cloud_output_location, + **word_cloud_parameters, + ) + + +if __name__ == "__main__": + word_cloud_generator()