Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add monitoring script to collect information about VMs in different states #128

Merged
merged 10 commits into from
Feb 20, 2024
137 changes: 137 additions & 0 deletions MonitoringTools/tests/test_collect_vm_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from unittest.mock import NonCallableMock, Mock, patch
from collect_vm_stats import (
number_servers_active,
number_servers_build,
number_servers_error,
number_servers_shutoff,
number_servers_total,
get_all_server_statuses,
server_obj_to_len,
main,
)


def test_server_obj_to_len():
"""
Tests that the length of a generator object is returned
"""
mock_generator_obj = iter([NonCallableMock(), NonCallableMock(), NonCallableMock()])
res = server_obj_to_len(mock_generator_obj)
assert res == 3


def test_number_servers_total():
"""
Tests that the total number of servers can be queried and counted
"""
mock_conn = Mock()
mock_server_results = iter(
[NonCallableMock(), NonCallableMock(), NonCallableMock()]
)
mock_conn.compute.servers.return_value = mock_server_results
num_returned = number_servers_total(mock_conn)
assert num_returned == 3


def test_number_servers_active():
"""
Tests that the active servers can be queried and counted
"""
mock_conn = Mock()
mock_server_results = iter(
[NonCallableMock(), NonCallableMock(), NonCallableMock()]
)
mock_conn.compute.servers.return_value = mock_server_results
num_returned = number_servers_active(mock_conn)
assert num_returned == 3


def test_number_servers_build():
"""
Tests that the servers in build state can be queried and counted
"""
mock_conn = Mock()
mock_server_results = iter(
[NonCallableMock(), NonCallableMock(), NonCallableMock()]
)
mock_conn.compute.servers.return_value = mock_server_results
num_returned = number_servers_build(mock_conn)
assert num_returned == 3


def test_number_servers_error():
"""
Tests that the error servers can be queried and counted
"""
mock_conn = Mock()
mock_server_results = iter(
[NonCallableMock(), NonCallableMock(), NonCallableMock()]
)
mock_conn.compute.servers.return_value = mock_server_results
num_returned = number_servers_error(mock_conn)
assert num_returned == 3


def test_number_servers_shutoff():
"""
Tests that the shutoff servers can be queried and counted
"""
mock_conn = Mock()
mock_server_results = iter(
[NonCallableMock(), NonCallableMock(), NonCallableMock()]
)
mock_conn.compute.servers.return_value = mock_server_results
num_returned = number_servers_shutoff(mock_conn)
assert num_returned == 3


@patch("collect_vm_stats.connect")
def test_get_all_server_statuses(mock_connect):
"""
Tests that get_all_server_statuses calls appropriate functions and returns
data string to send to influx
"""

def _mock_server_call(num_to_return):
"""stubs out server call
:param num_to_return: number of mock objects to return
"""
return iter(NonCallableMock() for _ in range(num_to_return))

mock_connect.return_value.compute.servers.side_effect = [
# total number found
_mock_server_call(10),
# active number found
_mock_server_call(4),
# build number found
_mock_server_call(3),
# error number found
_mock_server_call(2),
# shutoff number found
_mock_server_call(1),
]

mock_cloud_name = "prod"
res = get_all_server_statuses(mock_cloud_name)

assert res == (
"VMStats,instance=Prod "
"totalVM=10i,activeVM=4i,"
"buildVM=3i,errorVM=2i,shutoffVM=1i"
)


@patch("collect_vm_stats.run_scrape")
@patch("collect_vm_stats.parse_args")
def test_main(mock_parse_args, mock_run_scrape):
"""
tests main function calls run_scrape utility function properly
"""
mock_user_args = NonCallableMock()
main(mock_user_args)
mock_run_scrape.assert_called_once_with(
mock_parse_args.return_value, get_all_server_statuses
)
mock_parse_args.assert_called_once_with(
mock_user_args, description="Get All VM Statuses"
)
173 changes: 173 additions & 0 deletions MonitoringTools/usr/local/bin/collect_vm_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import sys
from typing import List, Dict, Optional

from openstack import connect
from send_metric_utils import run_scrape, parse_args


def server_obj_to_len(server_obj) -> int:
"""
Method that gets the length of a generator object
:param server_obj: OpenStack generator object from a query
:return: Integer for the length of the object i.e. number of results
"""
generator_list = list(server_obj)
total_results = len(generator_list)
return total_results


def run_server_query(
conn: connect,
filters: Optional[Dict],
page_size: int = 1000,
call_limit: int = 1000,
) -> List:
"""
Helper method for running server query using pagination - openstacksdk calls
can only return a maximum number of values - (set by limit) and to continue getting values
we need to run another call pass a "marker" value of the last
item seen
:param conn: OpenStack cloud connection
:param filters: A dictionary of filters to run on the query (server-side)
:param page_size: (Default 1000) how many items are returned by single call
:param call_limit: (Default 1000) max number of paging iterations.
- this is required to mitigate some bugs where successive paging loops back on itself
leading to endless calls
:return: A list of server objects
"""

pagination_filters = {"limit": page_size, "marker": None}
if not filters:
filters = {}

new_filters = {**filters, **pagination_filters}
query_res = []

curr_marker = None
num_calls = 0
while True:
if num_calls > call_limit:
break

for i, server in enumerate(
conn.compute.servers(details=False, all_projects=True, **new_filters)
):
query_res.append(server)

# openstacksdk calls break after going over pagination limit
if i == page_size - 1:
# restart the for loop with marker set
new_filters.update({"marker": server["id"]})
break

# if marker hasn't changed, then has query terminated
if new_filters["marker"] == curr_marker:
break

# set marker as current
curr_marker = new_filters["marker"]
num_calls += 1
return query_res


def number_servers_total(conn: connect) -> int:
"""
Query an OpenStack Cloud to find the total number of instances across
all projects.
:param conn: OpenStack cloud connection
:returns: Number of VMs in total across the cloud
"""
server_obj = run_server_query(conn, None)
# get number of items in generator object
total_instances = server_obj_to_len(server_obj)
return total_instances


def number_servers_active(conn: connect) -> int:
"""
Query an OpenStack Cloud to find the number of instances in
ACTIVE state.
:param conn: OpenStack Cloud Connection
:returns: Number of active VMs
"""
server_obj = run_server_query(conn, {"status": "ACTIVE"})
# get number of items in generator object
instance_active = server_obj_to_len(server_obj)
return instance_active


def number_servers_build(conn: connect) -> int:
"""
Query an OpenStack Cloud to find the number of instances in
BUILD state.
:param conn: OpenStack Cloud Connection
:returns: Number of VMs in BUILD state
"""
server_obj = run_server_query(conn, {"status": "BUILD"})
# get number of items in generator object
instance_build = server_obj_to_len(server_obj)
return instance_build


def number_servers_error(conn: connect) -> int:
"""
Query an OpenStack Cloud to find the number of instances in
ERROR state.
:param conn: OpenStack Cloud Connection
:returns: Number of VMs in ERROR state
"""
server_obj = run_server_query(conn, {"status": "ERROR"})
# get number of items in generator object
instance_err = server_obj_to_len(server_obj)
return instance_err


def number_servers_shutoff(conn: connect) -> int:
"""
Query an OpenStack Cloud to find the number of instances in
SHUTOFF state.
:param conn: OpenStack Cloud Connection
:returns: Number of VMs in SHUTOFF (STOPPED) state
"""
server_obj = run_server_query(conn, {"status": "SHUTOFF"})
# get number of items in generator object
instance_shutoff = server_obj_to_len(server_obj)
return instance_shutoff


def get_all_server_statuses(cloud_name: str) -> str:
"""
Collects the stats for vms and returns a dict
:param cloud_name: Name of OpenStack cloud to connect to
:return: A comma separated string containing VM states.
"""

# connect to an OpenStack cloud
conn = connect(cloud=cloud_name)
# collect stats in order: total, active, build, error, shutoff
total_vms = number_servers_total(conn)
active_vms = number_servers_active(conn)
build_vms = number_servers_build(conn)
error_vms = number_servers_error(conn)
shutoff_vms = number_servers_shutoff(conn)

server_statuses = (
f"VMStats,instance={cloud_name.capitalize()} "
f"totalVM={total_vms}i,activeVM={active_vms}i,"
f"buildVM={build_vms}i,errorVM={error_vms}i,"
f"shutoffVM={shutoff_vms}i"
)

return server_statuses


def main(user_args: List):
"""
Main method to collect server statuses for an influxDB instance
"""
influxdb_args = parse_args(user_args, description="Get All VM Statuses")
run_scrape(influxdb_args, get_all_server_statuses)


if __name__ == "__main__":
main(sys.argv[1:])
Loading