Skip to content

Commit

Permalink
chore(ci_visibility): add quarantine support to pytest (#11615)
Browse files Browse the repository at this point in the history
This PR adds preliminary support for quarantining tests in pytest.

The API to query which tests are quarantined does not exist yet on the
backend side, and the final form of that API is still to be defined, so
the code dealing with the API has been moved to a separate PR
(#11770). Currently, we can
mark tests as quarantined by manually adding the
`test.quarantine.is_quarantined` tag to the test with a pytest
decorator:

```
@pytest.mark.dd_tags(**{"test.quarantine.is_quarantined": True})
def test_fail_quarantined():
    assert False
```

For testing purposes, the environment variables
`_DD_TEST_FORCE_ENABLE_QUARANTINE` and `_DD_TEST_FORCE_ENABLE_ATR` have
been added to enable quarantine and ATR without depending on the
backend.

The test reporting looks like below. Errors and logs for quarantined
tests are not printed.

![image](https://github.com/user-attachments/assets/f070323d-edef-431e-a7a4-a6d119348876)



## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met 
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
vitor-de-araujo authored Dec 20, 2024
1 parent 20b2b03 commit 3f64666
Show file tree
Hide file tree
Showing 19 changed files with 626 additions and 89 deletions.
117 changes: 88 additions & 29 deletions ddtrace/contrib/pytest/_atr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,41 +29,64 @@ class _ATR_RETRY_OUTCOMES:
ATR_FINAL_FAILED = "dd_atr_final_failed"


class _QUARANTINE_ATR_RETRY_OUTCOMES(_ATR_RETRY_OUTCOMES):
ATR_ATTEMPT_PASSED = "dd_quarantine_atr_attempt_passed"
ATR_ATTEMPT_FAILED = "dd_quarantine_atr_attempt_failed"
ATR_ATTEMPT_SKIPPED = "dd_quarantine_atr_attempt_skipped"
ATR_FINAL_PASSED = "dd_quarantine_atr_final_passed"
ATR_FINAL_FAILED = "dd_quarantine_atr_final_failed"


_FINAL_OUTCOMES: t.Dict[TestStatus, str] = {
TestStatus.PASS: _ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
TestStatus.FAIL: _ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
}


_QUARANTINE_FINAL_OUTCOMES: t.Dict[TestStatus, str] = {
TestStatus.PASS: _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
TestStatus.FAIL: _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
}


def atr_handle_retries(
test_id: InternalTestId,
item: pytest.Item,
when: str,
original_result: pytest_TestReport,
test_outcome: _TestOutcome,
is_quarantined: bool = False,
):
if is_quarantined:
retry_outcomes = _QUARANTINE_ATR_RETRY_OUTCOMES
final_outcomes = _QUARANTINE_FINAL_OUTCOMES
else:
retry_outcomes = _ATR_RETRY_OUTCOMES
final_outcomes = _FINAL_OUTCOMES

outcomes = RetryOutcomes(
PASSED=retry_outcomes.ATR_ATTEMPT_PASSED,
FAILED=retry_outcomes.ATR_ATTEMPT_FAILED,
SKIPPED=retry_outcomes.ATR_ATTEMPT_SKIPPED,
XFAIL=retry_outcomes.ATR_ATTEMPT_PASSED,
XPASS=retry_outcomes.ATR_ATTEMPT_FAILED,
)

# Overwrite the original result to avoid double-counting when displaying totals in final summary
if when == "call":
if test_outcome.status == TestStatus.FAIL:
original_result.outcome = _ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED
return
if InternalTest.get_tag(test_id, "_dd.ci.atr_setup_failed"):
log.debug("Test item %s failed during setup, will not be retried for Early Flake Detection")
return
if InternalTest.get_tag(test_id, "_dd.ci.atr_teardown_failed"):
# NOTE: tests that passed their call but failed during teardown are not retried
log.debug("Test item %s failed during teardown, will not be retried for Early Flake Detection")
original_result.outcome = outcomes.FAILED
return

atr_outcome = _atr_do_retries(item)
atr_outcome = _atr_do_retries(item, outcomes)

final_report = pytest_TestReport(
nodeid=item.nodeid,
location=item.location,
keywords=item.keywords,
when="call",
longrepr=None,
outcome=_FINAL_OUTCOMES[atr_outcome],
outcome=final_outcomes[atr_outcome],
)
item.ihook.pytest_runtest_logreport(report=final_report)

Expand All @@ -72,15 +95,7 @@ def atr_get_failed_reports(terminalreporter: _pytest.terminal.TerminalReporter)
return terminalreporter.getreports(_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED)


def _atr_do_retries(item: pytest.Item) -> TestStatus:
outcomes = RetryOutcomes(
PASSED=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED,
FAILED=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED,
SKIPPED=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_SKIPPED,
XFAIL=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED,
XPASS=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED,
)

def _atr_do_retries(item: pytest.Item, outcomes: RetryOutcomes) -> TestStatus:
test_id = _get_test_id_from_item(item)

while InternalTest.atr_should_retry(test_id):
Expand Down Expand Up @@ -160,21 +175,21 @@ def atr_pytest_terminal_summary_post_yield(terminalreporter: _pytest.terminal.Te

_atr_write_report_for_status(
terminalreporter,
_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
"failed",
PYTEST_STATUS.FAILED,
raw_summary_strings,
markedup_summary_strings,
status_key=_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
status_text="failed",
report_outcome=PYTEST_STATUS.FAILED,
raw_strings=raw_summary_strings,
markedup_strings=markedup_summary_strings,
color="red",
)

_atr_write_report_for_status(
terminalreporter,
_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
"passed",
PYTEST_STATUS.PASSED,
raw_summary_strings,
markedup_summary_strings,
status_key=_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
status_text="passed",
report_outcome=PYTEST_STATUS.PASSED,
raw_strings=raw_summary_strings,
markedup_strings=markedup_summary_strings,
color="green",
)

Expand Down Expand Up @@ -268,3 +283,47 @@ def atr_get_teststatus(report: pytest_TestReport) -> _pytest_report_teststatus_r
if report.outcome == _ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED:
return (_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED, "F", ("ATR FINAL STATUS: FAILED", {"red": True}))
return None


def quarantine_atr_get_teststatus(report: pytest_TestReport) -> _pytest_report_teststatus_return_type:
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED,
"q",
(f"QUARANTINED RETRY {_get_retry_attempt_string(report.nodeid)}PASSED", {"blue": True}),
)
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED,
"Q",
(f"QUARANTINED RETRY {_get_retry_attempt_string(report.nodeid)}FAILED", {"blue": True}),
)
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_SKIPPED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_SKIPPED,
"q",
(f"QUARANTINED RETRY {_get_retry_attempt_string(report.nodeid)}SKIPPED", {"blue": True}),
)
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
".",
("QUARANTINED FINAL STATUS: PASSED", {"blue": True}),
)
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
"F",
("QUARANTINED FINAL STATUS: FAILED", {"blue": True}),
)
return None


def quarantine_pytest_terminal_summary_post_yield(terminalreporter: _pytest.terminal.TerminalReporter):
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED, None)
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED, None)
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_SKIPPED, None)
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED, [])
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED, [])

# TODO: report list of fully failed quarantined tests, possibly inside the ATR report.
35 changes: 32 additions & 3 deletions ddtrace/contrib/pytest/_plugin_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,15 @@
from ddtrace.contrib.pytest._atr_utils import atr_get_teststatus
from ddtrace.contrib.pytest._atr_utils import atr_handle_retries
from ddtrace.contrib.pytest._atr_utils import atr_pytest_terminal_summary_post_yield
from ddtrace.contrib.pytest._atr_utils import quarantine_atr_get_teststatus
from ddtrace.contrib.pytest._atr_utils import quarantine_pytest_terminal_summary_post_yield

log = get_logger(__name__)


_NODEID_REGEX = re.compile("^((?P<module>.*)/(?P<suite>[^/]*?))::(?P<name>.*?)$")
USER_PROPERTY_QUARANTINED = "dd_quarantined"
OUTCOME_QUARANTINED = "quarantined"


def _handle_itr_should_skip(item, test_id) -> bool:
Expand Down Expand Up @@ -327,6 +331,11 @@ def _pytest_runtest_protocol_pre_yield(item) -> t.Optional[ModuleCodeCollector.C

collect_test_coverage = InternalTestSession.should_collect_coverage() and not item_will_skip

is_quarantined = InternalTest.is_quarantined_test(test_id)
if is_quarantined:
# We add this information to user_properties to have it available in pytest_runtest_makereport().
item.user_properties += [(USER_PROPERTY_QUARANTINED, True)]

if collect_test_coverage:
return _start_collecting_coverage()

Expand Down Expand Up @@ -457,6 +466,8 @@ def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome

test_id = _get_test_id_from_item(item)

is_quarantined = InternalTest.is_quarantined_test(test_id)

test_outcome = _process_result(item, call, original_result)

# A None value for test_outcome.status implies the test has not finished yet
Expand All @@ -472,6 +483,11 @@ def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome
if not InternalTest.is_finished(test_id):
InternalTest.finish(test_id, test_outcome.status, test_outcome.skip_reason, test_outcome.exc_info)

if original_result.failed and is_quarantined:
# Ensure test doesn't count as failed for pytest's exit status logic
# (see <https://github.com/pytest-dev/pytest/blob/8.3.x/src/_pytest/main.py#L654>).
original_result.outcome = OUTCOME_QUARANTINED

# ATR and EFD retry tests only if their teardown succeeded to ensure the best chance the retry will succeed
# NOTE: this mutates the original result's outcome
if InternalTest.stash_get(test_id, "setup_failed") or InternalTest.stash_get(test_id, "teardown_failed"):
Expand All @@ -480,7 +496,7 @@ def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome
if InternalTestSession.efd_enabled() and InternalTest.efd_should_retry(test_id):
return efd_handle_retries(test_id, item, call.when, original_result, test_outcome)
if InternalTestSession.atr_is_enabled() and InternalTest.atr_should_retry(test_id):
return atr_handle_retries(test_id, item, call.when, original_result, test_outcome)
return atr_handle_retries(test_id, item, call.when, original_result, test_outcome, is_quarantined)


@pytest.hookimpl(hookwrapper=True)
Expand Down Expand Up @@ -538,6 +554,9 @@ def _pytest_terminal_summary_post_yield(terminalreporter, failed_reports_initial

if _pytest_version_supports_atr() and InternalTestSession.atr_is_enabled():
atr_pytest_terminal_summary_post_yield(terminalreporter)

quarantine_pytest_terminal_summary_post_yield(terminalreporter)

return


Expand Down Expand Up @@ -577,7 +596,7 @@ def _pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:

if InternalTestSession.efd_enabled() and InternalTestSession.efd_has_failed_tests():
session.exitstatus = pytest.ExitCode.TESTS_FAILED
if InternalTestSession.atr_has_failed_tests() and InternalTestSession.atr_has_failed_tests():
if InternalTestSession.atr_is_enabled() and InternalTestSession.atr_has_failed_tests():
session.exitstatus = pytest.ExitCode.TESTS_FAILED

invoked_by_coverage_run_status = _is_coverage_invoked_by_coverage_run()
Expand Down Expand Up @@ -615,7 +634,7 @@ def pytest_report_teststatus(
return

if _pytest_version_supports_atr() and InternalTestSession.atr_is_enabled():
test_status = atr_get_teststatus(report)
test_status = atr_get_teststatus(report) or quarantine_atr_get_teststatus(report)
if test_status is not None:
return test_status

Expand All @@ -624,6 +643,16 @@ def pytest_report_teststatus(
if test_status is not None:
return test_status

user_properties = getattr(report, "user_properties", [])
is_quarantined = (USER_PROPERTY_QUARANTINED, True) in user_properties
if is_quarantined:
if report.when == "teardown":
return (OUTCOME_QUARANTINED, "q", ("QUARANTINED", {"blue": True}))
else:
# Don't show anything for setup and call of quarantined tests, regardless of
# whether there were errors or not.
return ("", "", "")


@pytest.hookimpl(trylast=True)
def pytest_ddtrace_get_item_module_name(item):
Expand Down
4 changes: 3 additions & 1 deletion ddtrace/contrib/pytest/_retry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def _get_outcome_from_retry(
# _initrequest() needs to be called first because the test has already executed once
item._initrequest()

# Reset output capture across retries.
item._report_sections = []

# Setup
setup_call, setup_report = _retry_run_when(item, "setup", outcomes)
if setup_report.outcome == outcomes.FAILED:
Expand Down Expand Up @@ -80,7 +83,6 @@ def _get_outcome_from_retry(
_outcome_status = TestStatus.SKIP
elif call_report.outcome == outcomes.PASSED:
_outcome_status = TestStatus.PASS

# Teardown does not happen if setup skipped
if not setup_report.skipped:
teardown_call, teardown_report = _retry_run_when(item, "teardown", outcomes)
Expand Down
20 changes: 19 additions & 1 deletion ddtrace/internal/ci_visibility/_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from http.client import RemoteDisconnected
import json
from json import JSONDecodeError
import os
import socket
import typing as t
from uuid import uuid4
Expand Down Expand Up @@ -38,6 +39,7 @@
from ddtrace.internal.logger import get_logger
from ddtrace.internal.test_visibility._internal_item_ids import InternalTestId
from ddtrace.internal.test_visibility.coverage_lines import CoverageLines
from ddtrace.internal.utils.formats import asbool
from ddtrace.internal.utils.http import ConnectionType
from ddtrace.internal.utils.http import Response
from ddtrace.internal.utils.http import get_connection
Expand Down Expand Up @@ -87,6 +89,11 @@ class EarlyFlakeDetectionSettings:
faulty_session_threshold: int = 30


@dataclasses.dataclass(frozen=True)
class QuarantineSettings:
enabled: bool = False


@dataclasses.dataclass(frozen=True)
class TestVisibilityAPISettings:
__test__ = False
Expand All @@ -96,6 +103,7 @@ class TestVisibilityAPISettings:
itr_enabled: bool = False
flaky_test_retries_enabled: bool = False
early_flake_detection: EarlyFlakeDetectionSettings = dataclasses.field(default_factory=EarlyFlakeDetectionSettings)
quarantine: QuarantineSettings = dataclasses.field(default_factory=QuarantineSettings)


@dataclasses.dataclass(frozen=True)
Expand Down Expand Up @@ -359,7 +367,9 @@ def fetch_settings(self) -> TestVisibilityAPISettings:
skipping_enabled = attributes["tests_skipping"]
require_git = attributes["require_git"]
itr_enabled = attributes["itr_enabled"]
flaky_test_retries_enabled = attributes["flaky_test_retries_enabled"]
flaky_test_retries_enabled = attributes["flaky_test_retries_enabled"] or asbool(
os.getenv("_DD_TEST_FORCE_ENABLE_ATR")
)

if attributes["early_flake_detection"]["enabled"]:
early_flake_detection = EarlyFlakeDetectionSettings(
Expand All @@ -372,6 +382,12 @@ def fetch_settings(self) -> TestVisibilityAPISettings:
)
else:
early_flake_detection = EarlyFlakeDetectionSettings()

quarantine = QuarantineSettings(
enabled=attributes.get("quarantine", {}).get("enabled", False)
or asbool(os.getenv("_DD_TEST_FORCE_ENABLE_QUARANTINE"))
)

except KeyError:
record_api_request_error(metric_names.error, ERROR_TYPES.UNKNOWN)
raise
Expand All @@ -383,6 +399,7 @@ def fetch_settings(self) -> TestVisibilityAPISettings:
itr_enabled=itr_enabled,
flaky_test_retries_enabled=flaky_test_retries_enabled,
early_flake_detection=early_flake_detection,
quarantine=quarantine,
)

record_settings_response(
Expand All @@ -392,6 +409,7 @@ def fetch_settings(self) -> TestVisibilityAPISettings:
itr_enabled=api_settings.itr_enabled,
flaky_test_retries_enabled=api_settings.flaky_test_retries_enabled,
early_flake_detection_enabled=api_settings.early_flake_detection.enabled,
quarantine_enabled=api_settings.quarantine.enabled,
)

return api_settings
Expand Down
Loading

0 comments on commit 3f64666

Please sign in to comment.