Skip to content

Commit

Permalink
Merge pull request #131 from stfc/update-service-to-influx
Browse files Browse the repository at this point in the history
BUG: fix bugs various bugs
  • Loading branch information
khalford authored Feb 16, 2024
2 parents 8d1d32f + 7f8c138 commit afdd4d2
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 61 deletions.
140 changes: 92 additions & 48 deletions MonitoringTools/tests/test_service_status_to_influx.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,29 @@ def test_get_hypervisor_properties_state_up():
"""
mock_hv = {
"state": "up",
"memory_size": 1,
"memory_used": 2,
"memory_free": 3,
"memory_size": 2,
"memory_used": 1,
"vcpus_used": 4,
"vcpus": 5,
}
expected_result = {
"hv": {
"aggregate": "no-aggregate",
"memorymax": 1,
"memoryused": 2,
"memoryavailable": 3,
"memorymax": 2,
"memoryused": 1,
"memoryavailable": 1,
"memperc": 50,
"cpuused": 4,
"cpumax": 5,
"cpuavailable": 1,
"cpuperc": 80,
"agent": 1,
"state": 1,
"statetext": "Up",
"utilperc": 80,
"cpufull": 0,
"memfull": 1,
"full": 1,
}
}
assert get_hypervisor_properties(mock_hv) == expected_result
Expand All @@ -52,24 +57,29 @@ def test_get_hypervisor_properties_state_down():
"""
mock_hv = {
"state": "down",
"memory_size": 1,
"memory_used": 2,
"memory_free": 3,
"memory_size": 2,
"memory_used": 1,
"vcpus_used": 4,
"vcpus": 5,
}
expected_result = {
"hv": {
"aggregate": "no-aggregate",
"memorymax": 1,
"memoryused": 2,
"memoryavailable": 3,
"memorymax": 2,
"memoryused": 1,
"memoryavailable": 1,
"memperc": 50,
"cpuused": 4,
"cpumax": 5,
"cpuavailable": 1,
"cpuperc": 80,
"agent": 1,
"state": 0,
"statetext": "Down",
"utilperc": 80,
"cpufull": 0,
"memfull": 1,
"full": 1,
}
}
assert get_hypervisor_properties(mock_hv) == expected_result
Expand Down Expand Up @@ -170,16 +180,24 @@ def test_convert_to_data_string_one_hv_one_service(mock_get_service_prop_string)
Tests convert_to_data_string works with single entry in details
"""
mock_instance = "prod"
mock_service_details = NonCallableMock()
mock_service_details = {
"aggregate": "ag1",
"statetext": "Up",
"statustext": "Enabled",
"prop1": "val1",
}
mock_details = {"hv1": {"service1": mock_service_details}}

mock_get_service_prop_string.return_value = "prop1=val1"

res = convert_to_data_string(mock_instance, mock_details)
assert (
res == 'ServiceStatus,host="hv1",service="service1",instance=Prod prop1=val1\n'
res ==
'ServiceStatus,host="hv1",service="service1",instance=Prod,'
'statetext="Up",statustext="Enabled",aggregate="ag1"'
' prop1=val1\n'
)
mock_get_service_prop_string.assert_called_once_with(mock_service_details)
mock_get_service_prop_string.assert_called_once_with({"prop1": "val1"})


@patch("service_status_to_influx.get_service_prop_string")
Expand All @@ -188,8 +206,18 @@ def test_convert_to_data_string_one_hv_multi_service(mock_get_service_prop_strin
Tests convert_to_data_string works with single entry in details with multiple service binaries
"""
mock_instance = "prod"
mock_service_details_1 = NonCallableMock()
mock_service_details_2 = NonCallableMock()
mock_service_details_1 = {
"aggregate": "ag1",
"statetext": "Up",
"statustext": "Enabled",
"prop1": "val1",
}
mock_service_details_2 = {
"aggregate": "ag2",
"statetext": "Down",
"statustext": "Disabled",
"prop1": "val2",
}
mock_details = {
"hv1": {"service1": mock_service_details_1, "service2": mock_service_details_2}
}
Expand All @@ -198,11 +226,15 @@ def test_convert_to_data_string_one_hv_multi_service(mock_get_service_prop_strin

res = convert_to_data_string(mock_instance, mock_details)
assert res == (
'ServiceStatus,host="hv1",service="service1",instance=Prod prop1=val1\n'
'ServiceStatus,host="hv1",service="service2",instance=Prod prop1=val2\n'
'ServiceStatus,host="hv1",service="service1",instance=Prod,'
'statetext="Up",statustext="Enabled",aggregate="ag1" '
'prop1=val1\n'
'ServiceStatus,host="hv1",service="service2",instance=Prod,'
'statetext="Down",statustext="Disabled",aggregate="ag2" '
'prop1=val2\n'
)
mock_get_service_prop_string.assert_has_calls(
[call(mock_service_details_1), call(mock_service_details_2)]
[call({"prop1": "val1"}), call({"prop1": "val2"})]
)


Expand All @@ -212,9 +244,25 @@ def test_convert_to_data_string_multi_item(mock_get_service_prop_string):
Tests convert_to_data_string works with multiple entries in dict for details
"""
mock_instance = "prod"
mock_service_details_1 = NonCallableMock()
mock_service_details_2 = NonCallableMock()
mock_service_details_3 = NonCallableMock()
mock_service_details_1 = {
"aggregate": "ag1",
"statetext": "Up",
"statustext": "Enabled",
"prop1": "val1",
}
mock_service_details_2 = {
"aggregate": "ag2",
"statetext": "Down",
"statustext": "Disabled",
"prop1": "val2",
}
mock_service_details_3 = {
"aggregate": "ag3",
"statetext": "Up",
"statustext": "Disabled",
"prop1": "val3",
}

mock_details = {
"hv1": {
"service1": mock_service_details_1,
Expand All @@ -231,15 +279,21 @@ def test_convert_to_data_string_multi_item(mock_get_service_prop_string):

res = convert_to_data_string(mock_instance, mock_details)
assert res == (
'ServiceStatus,host="hv1",service="service1",instance=Prod prop1=val1\n'
'ServiceStatus,host="hv1",service="service2",instance=Prod prop1=val2\n'
'ServiceStatus,host="hv2",service="service3",instance=Prod prop1=val3\n'
'ServiceStatus,host="hv1",service="service1",instance=Prod,'
'statetext="Up",statustext="Enabled",aggregate="ag1" '
'prop1=val1\n'
'ServiceStatus,host="hv1",service="service2",instance=Prod,'
'statetext="Down",statustext="Disabled",aggregate="ag2" '
'prop1=val2\n'
'ServiceStatus,host="hv2",service="service3",instance=Prod,'
'statetext="Up",statustext="Disabled",aggregate="ag3" '
'prop1=val3\n'
)
mock_get_service_prop_string.assert_has_calls(
[
call(mock_service_details_1),
call(mock_service_details_2),
call(mock_service_details_3),
call({"prop1": "val1"}),
call({"prop1": "val2"}),
call({"prop1": "val3"}),
]
)

Expand All @@ -251,20 +305,10 @@ def test_get_service_prop_string_empty_dict():
assert get_service_prop_string({}) == ""


def test_get_service_prop_string_with_string_props():
"""
tests get_service_prop_string returns correct prop string
when given string props it should not suffix each property value with i
"""
props = {"statetext": "foo", "statustext": "bar", "aggregate": "baz"}
expected_result = 'statetext="foo",statustext="bar",aggregate="baz"'
assert get_service_prop_string(props) == expected_result


def test_get_service_prop_string_with_int_props():
def test_get_service_prop_string():
"""
tests get_service_prop_string returns correct prop string
when given int props it should suffix each property value with i
it should suffix each property value with i
"""
props = {"prop1": 1, "prop2": 2, "prop3": 3}
expected_result = "prop1=1i,prop2=2i,prop3=3i"
Expand Down Expand Up @@ -328,10 +372,10 @@ def test_update_with_service_statuses(mock_get_service_properties):

# stubs out actually getting properties
mock_get_service_properties.side_effect = [
{"nova-compute": {"status": "enabled"}},
{"nova-compute": {"status": 1, "statustext": "enabled"}},
{"other-service": {}},
{"other-service": {"status": "enabled"}},
{"nova-compute": {"status": "disabled"}},
{"other-service": {"status": 1, "statustext": "enabled"}},
{"nova-compute": {"status": 0, "statustext": "disabled"}},
]

res = update_with_service_statuses(mock_conn, mock_status_details)
Expand All @@ -342,16 +386,16 @@ def test_update_with_service_statuses(mock_get_service_properties):
# shouldn't override what's already there
# add hv status == nova-compute svc status
"hv1": {
"hv": {"status": "enabled"},
"nova-compute": {"status": "enabled"},
"hv": {"status": 1, "statustext": "enabled"},
"nova-compute": {"status": 1, "statustext": "enabled"},
"foo": {},
"bar": {},
"other-service": {},
},
# only nova-compute status adds hv status
"hv2": {"hv": {}, "other-service": {"status": "enabled"}},
"hv2": {"hv": {}, "other-service": {"status": 1, "statustext": "enabled"}},
# adds what doesn't exist, no "hv" so no setting status
"hv3": {"nova-compute": {"status": "disabled"}},
"hv3": {"nova-compute": {"status": 0, "statustext": "disabled"}},
}


Expand Down
24 changes: 24 additions & 0 deletions MonitoringTools/tests/test_slottifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,30 @@ def test_calculate_slots_on_hv_gpu_available_max():
assert res.max_gpu_slots_capacity_enabled == 5


def test_calculate_slots_on_hv_gpu_max_slots_calculated_properly():
"""
tests calculate_slots_on_hv calculates max slots properly for gpu flavor
"""
res = calculate_slots_on_hv(
# specifies a gpu flavor
"g-flavor1",
{"gpus_required": 2, "cores_required": 10, "mem_required": 10},
{
"compute_service_status": "enabled",
# should find 3 slots since we require 2 gpus for each slot
"gpu_capacity": 6,
"cores_available": 100,
"mem_available": 100,
"core_capacity": 100,
"mem_capacity": 100,
},
)
assert res.slots_available == 3
assert res.max_gpu_slots_capacity == 3
assert res.estimated_gpu_slots_used == 0
assert res.max_gpu_slots_capacity_enabled == 3


def test_calculate_slots_on_hv_calculates_used_gpu_capacity():
"""
tests calculate_slots_on_hv calculates slots properly for gpu flavor
Expand Down
40 changes: 31 additions & 9 deletions MonitoringTools/usr/local/bin/service_status_to_influx.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,24 @@ def get_hypervisor_properties(hypervisor: Hypervisor) -> Dict:
"aggregate": "no-aggregate",
"memorymax": hypervisor["memory_size"],
"memoryused": hypervisor["memory_used"],
"memoryavailable": hypervisor["memory_free"],
"memoryavailable": hypervisor["memory_size"] - hypervisor["memory_used"],
"memperc": round((hypervisor["memory_used"] / hypervisor["memory_size"]) * 100),
"cpumax": hypervisor["vcpus"],
"cpuused": hypervisor["vcpus_used"],
"cpuavailable": hypervisor["vcpus"] - hypervisor["vcpus_used"],
"cpuperc": round((hypervisor["vcpus_used"] / hypervisor["vcpus"]) * 100),
"agent": 1,
"state": 1 if hypervisor["state"] == "up" else 0,
"statetext": hypervisor["state"].capitalize(),
}
}
hv_info = hv_prop_dict["hv"]

hv_info["utilperc"] = max(hv_info["cpuperc"], hv_info["memperc"])
hv_info["cpufull"] = 1 if hv_info["cpuperc"] >= 97 else 0
hv_info["memfull"] = 1 if hv_info["memoryavailable"] <= 8192 else 0
hv_info["full"] = int(hv_info["memfull"] or hv_info["cpufull"])

return hv_prop_dict


Expand Down Expand Up @@ -80,11 +89,24 @@ def convert_to_data_string(instance: str, service_details: Dict) -> str:
data_string = ""
for hypervisor_name, services in service_details.items():
for service_binary, service_stats in services.items():
data_string += (
f'ServiceStatus,host="{hypervisor_name}",'
f'service="{service_binary}",instance={instance.capitalize()} '
f"{get_service_prop_string(service_stats)}\n"
statustext = service_stats.pop("statustext")
statetext = service_stats.pop("statetext")
new_data_string = (
f'ServiceStatus'
f',host="{hypervisor_name}"'
f',service="{service_binary}"'
f',instance={instance.capitalize()}'
f',statetext="{statetext}"'
f',statustext="{statustext}"'
)

aggregate = service_stats.pop("aggregate", None)
if aggregate:
new_data_string += f',aggregate="{aggregate}"'

new_data_string += f" {get_service_prop_string(service_stats)}\n"
data_string += new_data_string

return data_string


Expand All @@ -97,10 +119,7 @@ def get_service_prop_string(service_dict: Dict) -> str:
"""
stats_strings = []
for stat, val in service_dict.items():
stats_string = f'{stat}="{val}"'
if stat not in ["statetext", "statustext", "aggregate"]:
stats_string = f"{stat}={val}i"
stats_strings.append(stats_string)
stats_strings.append(f"{stat}={val}i")
return ",".join(stats_strings)


Expand Down Expand Up @@ -137,6 +156,9 @@ def update_with_service_statuses(conn, status_details: Dict) -> Dict:
service_host.update(get_service_properties(service))
if "hv" in service_host and service["binary"] == "nova-compute":
service_host["hv"]["status"] = service_host["nova-compute"]["status"]
service_host["hv"]["statustext"] = service_host["nova-compute"][
"statustext"
]

return status_details

Expand Down
12 changes: 8 additions & 4 deletions MonitoringTools/usr/local/bin/slottifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,10 @@ def calculate_slots_on_hv(
f"gpu flavor {flavor_name} does not have 'gpunum' metadata"
)

theoretical_gpu_slots_available = (
hv_info["gpu_capacity"] // flavor_reqs["gpus_required"]
theoretical_gpu_slots_available = min(
hv_info["gpu_capacity"] // flavor_reqs["gpus_required"],
hv_info["core_capacity"] // flavor_reqs["cores_required"],
hv_info["mem_capacity"] // flavor_reqs["mem_required"],
)

estimated_slots_used = (
Expand All @@ -173,10 +175,12 @@ def calculate_slots_on_hv(
theoretical_gpu_slots_available, estimated_slots_used
)

slots_dataclass.max_gpu_slots_capacity = hv_info["gpu_capacity"]
slots_dataclass.max_gpu_slots_capacity = theoretical_gpu_slots_available

if hv_info["compute_service_status"] == "enabled":
slots_dataclass.max_gpu_slots_capacity_enabled = hv_info["gpu_capacity"]
slots_dataclass.max_gpu_slots_capacity_enabled = (
theoretical_gpu_slots_available
)

slots_available = min(
slots_available,
Expand Down

0 comments on commit afdd4d2

Please sign in to comment.