Skip to content

Commit

Permalink
test: add a small set of ops.testing benchmark tests (#1504)
Browse files Browse the repository at this point in the history
The PR:
 * Adds a new `tox -e benchmark` command.
 * Adds a new `test/test_benchmark.py` model.
 * Excludes the benchmark tests from `tox -e unit` and `tox -e coverage`

I wrote a small set of benchmark tests while working on #1434. It seems
like it'd be worth keeping these - in the future, they could be expanded
to include a small set of benchmark tests that target ops more
specifically, and ideally we could have a CI workflow that failed if
there were regressions (currently, running the tests just outputs the
timing, and you can use the pytest-benchmark tools to do comparisons).
  • Loading branch information
tonyandrewmeyer authored Jan 22, 2025
1 parent f12df01 commit caa6a07
Show file tree
Hide file tree
Showing 8 changed files with 337 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ test/charms/test_smoke/.charmcraft_output_packages.txt
test/charms/test_smoke/requirements.txt
test/charms/test_smoke/charmcraft.yaml
juju-crashdump*.tar.xz

# Benchmark test artifacts
.benchmarks
24 changes: 24 additions & 0 deletions test/benchmark/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright 2024 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Benchmark tests for ops.
Optimising performance is not a current goal with ops - any gains are
unlikely to be significant compared with ones from Juju or the charm and
its workload. However, we do want to ensure that we do not unknowingly
regress in performance.
This package is for tests that cover core functionality, to be used for
performance benchmarking.
"""
22 changes: 22 additions & 0 deletions testing/tests/benchmark/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2024 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Benchmark tests for ops-scenario.
Optimising performance is not a current goal with ops-scenario. However,
we do want to ensure that we do not unknowingly regress in performance.
This package contains a small set of tests that cover core functionality,
to be used for performance benchmarking.
"""
40 changes: 40 additions & 0 deletions testing/tests/benchmark/charms/benchmark_charm/charmcraft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: benchmark
type: charm
title: ops-benchmark
summary: A simple charm used for benchmark tests
description: Read the summary.
bases:
- build-on:
- name: ubuntu
channel: "22.04"
run-on:
- name: ubuntu
channel: "22.04"
config:
options:
log-level:
description: Configures the log level.
default: "info"
type: string
actions:
act:
description: Do something to the workload.
containers:
foo:
resources:
baz:
type: oci-image
storage:
bar:
type: filesystem
requires:
rel:
interface: qux
peers:
peer:
interface: chat
extra-bindings:
MySpace: null
parts:
charm:
charm-entrypoint: src/bcharm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ops ~= 2.17
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python3
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.

"""Basic benchmarking charm.
Note that this is named benchmark_charm rather than charm as is typical to
avoid conflicts with ops.charm in the testing runs.
"""

import logging

import ops

logger = logging.getLogger("__name__")


class BenchmarkCharm(ops.CharmBase):
"""Charm the service."""

_stored = ops.StoredState()

def __init__(self, framework: ops.Framework):
super().__init__(framework)
framework.observe(self.on.update_status, self._on_update_status)
framework.observe(self.on.stop, self._on_stop)
framework.observe(self.on.config_changed, self._on_config_changed)

def _on_update_status(self, _: ops.UpdateStatusEvent):
# Say a bunch of things.
for level in ("debug", "info", "warning", "error"):
for i in range(50):
getattr(logger, level)("This is message %s", i)

def _on_stop(self, _: ops.StopEvent):
"""Do nothing - this exists to benchmark having an observer."""

def _on_config_changed(self, event: ops.ConfigChangedEvent):
event.defer()


if __name__ == "__main__": # pragma: nocover
ops.main(BenchmarkCharm)
178 changes: 178 additions & 0 deletions testing/tests/benchmark/test_testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# Copyright 2024 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Benchmark tests for ops-scenario."""

import dataclasses
import pathlib
import sys

import pytest

import ops
from ops import testing


@pytest.fixture
def benchmark_charm():
charm_path = str(
pathlib.Path(__file__).parent / "charms" / "benchmark_charm" / "src"
)
sys.path.append(charm_path)
from benchmark_charm import BenchmarkCharm

yield BenchmarkCharm
sys.path.remove(charm_path)


# Note: the 'benchmark' argument here is a fixture that pytest-benchmark
# automatically makes available to all tests.
def test_context_explicit_meta(benchmark):
ctx = benchmark(testing.Context, ops.CharmBase, meta={"name": "foo"})
assert isinstance(ctx, testing.Context)


def test_run_no_observer(benchmark, benchmark_charm):
ctx = testing.Context(benchmark_charm)
benchmark(ctx.run, ctx.on.start(), testing.State())
# As with all these tests, we're interested in how long the
# test took to run. However, we also verify that we did emit
# an event (of a single kind across all the benchmark
# repetitions) so that we are confident that something did
# happen.
assert len({e.handle.kind for e in ctx.emitted_events}) == 1


def test_run_observed(benchmark, benchmark_charm):
ctx = testing.Context(benchmark_charm)
benchmark(ctx.run, ctx.on.stop(), testing.State())
assert len({e.handle.kind for e in ctx.emitted_events}) == 1


def test_context_explicit_meta_config_actions(benchmark):
ctx = benchmark(
testing.Context,
ops.CharmBase,
meta={"name": "foo"},
actions={"act": {"description": "foo"}},
config={"options": {"conf": {"type": "int", "description": "bar"}}},
)
ctx.run(ctx.on.action("act"), testing.State(config={"conf": 10}))
assert len({e.handle.kind for e in ctx.emitted_events}) == 1


def test_context_autoload_meta(benchmark, benchmark_charm):
ctx = benchmark(testing.Context, benchmark_charm)
assert isinstance(ctx, testing.Context)


def test_many_tests_explicit_meta(benchmark):
# Calling benchmark(ctx.run, ...) benchmarks a test that looks like:
# def test_x():
# ...
# ctx.run(ctx.on...)
# assert ...
# In this test, we benchmark a test that looks like this instead:
# def test_x():
# ...
# state = testing.State(...)
# state = ctx.run(ctx.on..., state) # Event 1
# assert ...
# state = ctx.run(ctx.on..., state) # Event 2
# assert ...
# We generally recommend using a fresh context and having a single
# run in each test function. However, there are times when it makes
# sense to simulate multiple events in a row, and we want to make
# sure that we are timing test that take this form as well, where the
# cost of creating the Context object is amortised across many runs.

def mock_pytest():
"""Simulate running multiple tests against the same charm."""
ctx = testing.Context(ops.CharmBase, meta={"name": "foo"})
state = testing.State()
for event in ("install", "start", "stop", "remove"):
state = ctx.run(getattr(ctx.on, event)(), state)

benchmark(mock_pytest)


def test_many_tests_autoload_meta(benchmark, benchmark_charm):
def mock_pytest():
"""Simulate running multiple tests against the same charm."""
ctx = testing.Context(benchmark_charm)
state = testing.State()
for event in ("install", "start", "stop", "remove"):
state = ctx.run(getattr(ctx.on, event)(), state)

benchmark(mock_pytest)


def test_lots_of_logs(benchmark, benchmark_charm):
ctx = testing.Context(benchmark_charm)
benchmark(ctx.run, ctx.on.update_status(), testing.State())
assert len(ctx.juju_log) > 200


def test_full_state(benchmark, benchmark_charm):
def fill_state():
rel = testing.Relation("rel")
peer = testing.PeerRelation("peer")
network = testing.Network("MySpace")
container = testing.Container("foo")
storage = testing.Storage("bar")
tcp = testing.TCPPort(22)
icmp = testing.ICMPPort()
udp = testing.UDPPort(8000)
secret = testing.Secret({"password": "admin"})
resource = testing.Resource(name="baz", path=".")
stored_state = testing.StoredState(owner_path="BenchMarkCharm")
state = testing.State(
relations={rel, peer},
networks={network},
containers={container},
storages={storage},
opened_ports={tcp, icmp, udp},
secrets={secret},
resources={resource},
stored_states={stored_state},
app_status=testing.ActiveStatus(),
unit_status=testing.BlockedStatus("I'm stuck!"),
)
return state

ctx = testing.Context(benchmark_charm)
state_in = benchmark(fill_state)
state_out = ctx.run(ctx.on.start(), state_in)
# stored_states is complicated: it will contain a stored state the
# framework itself added (counting the number of events), so the
# input and output state doesn't naively match. We strip that out and
# compare it separately.
state_in_dict = dataclasses.asdict(state_in)
state_out_dict = dataclasses.asdict(state_out)
# An owner_path of None means that it's owned by the framework.
assert state_in_dict["stored_states"] == {
ss for ss in state_out_dict["stored_states"] if ss.owner_path is not None
}
del state_in_dict["stored_states"]
del state_out_dict["stored_states"]
assert state_in_dict == state_out_dict


def test_deferred_events(benchmark, benchmark_charm):
ctx = testing.Context(benchmark_charm, capture_deferred_events=True)
deferred = ctx.on.stop().deferred(benchmark_charm._on_stop)
state_in = testing.State(deferred=[deferred])
state_out = benchmark(ctx.run, ctx.on.config_changed(), state_in)
assert len(state_out.deferred) == 1
assert len({e.handle.kind for e in ctx.emitted_events}) == 2
29 changes: 26 additions & 3 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ envlist = lint, static, unit
src_path = ops/
tst_path = test/
all_path = {[vars]src_path} {[vars]tst_path}
testing_src_path = testing/src/scenario/
testing_tst_path = testing/tests/

[testenv]
basepython = python3
Expand Down Expand Up @@ -104,7 +106,9 @@ deps =
-e .
-e testing
commands =
pytest -n auto --ignore={[vars]tst_path}smoke -v --tb native \
pytest -n auto --ignore={[vars]tst_path}smoke \
--ignore={[vars]tst_path}benchmark --ignore={[vars]testing_tst_path}benchmark \
-v --tb native \
-W 'ignore:Harness is deprecated:PendingDeprecationWarning' {posargs}

[testenv:coverage]
Expand All @@ -124,11 +128,30 @@ deps =
-e testing
commands =
mkdir -p .report
coverage run --source={[vars]src_path},testing/src/scenario \
-m pytest --ignore={[vars]tst_path}smoke -v --tb native {posargs}
coverage run --source={[vars]src_path},{[vars]testing_src_path} \
-m pytest --ignore={[vars]tst_path}smoke \
--ignore={[vars]tst_path}benchmark --ignore={[vars]testing_tst_path}benchmark \
-v --tb native \
-W 'ignore:Harness is deprecated:PendingDeprecationWarning' {posargs}
coverage xml -o .report/coverage.xml
coverage report

[testenv:benchmark]
description = Run benchmark tests
passenv =
RUN_REAL_PEBBLE_TESTS
PEBBLE
deps =
PyYAML==6.*
websocket-client==1.*
pytest~=7.2
pytest-benchmark~=5.0
typing_extensions~=4.2
-e .
-e testing
commands =
pytest -v --tb native {[vars]tst_path}benchmark {[vars]testing_tst_path}benchmark {posargs}

[testenv:pebble]
description = Run real pebble tests
allowlist_externals = pebble
Expand Down

0 comments on commit caa6a07

Please sign in to comment.