Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(ci_visibility): add quarantine support to pytest #11615

Merged
merged 35 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
38120c2
add settings and break everything
vitor-de-araujo Nov 25, 2024
a600326
fix
vitor-de-araujo Nov 25, 2024
de0d22b
magit test
vitor-de-araujo Nov 25, 2024
70fc528
hacks aplenty
vitor-de-araujo Nov 29, 2024
b2ff59b
fix duplicate stdout capture
vitor-de-araujo Nov 29, 2024
a045a09
what am i doing
vitor-de-araujo Dec 4, 2024
2716b20
a
vitor-de-araujo Dec 4, 2024
cfd1a1d
dd_quarantine_shenanigans
vitor-de-araujo Dec 4, 2024
df0ef19
tag has_failed_all_retries, remove quarantined items from stats
vitor-de-araujo Dec 5, 2024
5696fc3
report correct number of quarantined tests with ATR
vitor-de-araujo Dec 5, 2024
be651e1
breadcrumb
vitor-de-araujo Dec 6, 2024
6a3072f
use user_properties to store quarantined status for report
vitor-de-araujo Dec 6, 2024
23f9f5e
small cleanup
vitor-de-araujo Dec 6, 2024
5b00559
allow setting quarantine tag explicitly
vitor-de-araujo Dec 6, 2024
bfe5009
add ability to force quarantine on while we don't have the API
vitor-de-araujo Dec 6, 2024
870c54d
allow force enabling ATR
vitor-de-araujo Dec 6, 2024
84ab4ec
telemetry
vitor-de-araujo Dec 6, 2024
f3b6b71
linting and things
vitor-de-araujo Dec 10, 2024
92c1ea7
exposing the lies
vitor-de-araujo Dec 10, 2024
432fc13
fix fix fix
vitor-de-araujo Dec 10, 2024
7e4a9d5
stuff
vitor-de-araujo Dec 12, 2024
c3eb7c7
fix exit status
vitor-de-araujo Dec 13, 2024
32a2ab0
alternative approach
vitor-de-araujo Dec 13, 2024
62f4932
Some semblance of tests
vitor-de-araujo Dec 16, 2024
29ca2f7
test spans separately
vitor-de-araujo Dec 16, 2024
681676e
lint
vitor-de-araujo Dec 16, 2024
3307284
constants!
vitor-de-araujo Dec 18, 2024
0239e0c
Remove things for the backend API
vitor-de-araujo Dec 18, 2024
56dc826
Merge branch 'main' of github.com:DataDog/dd-trace-py into vitor-de-a…
vitor-de-araujo Dec 18, 2024
34cd93d
fixes?
vitor-de-araujo Dec 18, 2024
1ac8026
Remove duplicate CIVisibility.disable() from subclasses
vitor-de-araujo Dec 19, 2024
409634b
Merge branch 'main' of github.com:DataDog/dd-trace-py into vitor-de-a…
vitor-de-araujo Dec 19, 2024
7b31dde
Merge branch 'main' of github.com:DataDog/dd-trace-py into vitor-de-a…
vitor-de-araujo Dec 19, 2024
489e6e8
Use PytestTestCaseBase in test_encoder.py
vitor-de-araujo Dec 20, 2024
e419523
Merge branch 'main' of github.com:DataDog/dd-trace-py into vitor-de-a…
vitor-de-araujo Dec 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 88 additions & 29 deletions ddtrace/contrib/pytest/_atr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,41 +29,64 @@ class _ATR_RETRY_OUTCOMES:
ATR_FINAL_FAILED = "dd_atr_final_failed"


class _QUARANTINE_ATR_RETRY_OUTCOMES(_ATR_RETRY_OUTCOMES):
ATR_ATTEMPT_PASSED = "dd_quarantine_atr_attempt_passed"
ATR_ATTEMPT_FAILED = "dd_quarantine_atr_attempt_failed"
ATR_ATTEMPT_SKIPPED = "dd_quarantine_atr_attempt_skipped"
ATR_FINAL_PASSED = "dd_quarantine_atr_final_passed"
ATR_FINAL_FAILED = "dd_quarantine_atr_final_failed"


_FINAL_OUTCOMES: t.Dict[TestStatus, str] = {
TestStatus.PASS: _ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
TestStatus.FAIL: _ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
}


_QUARANTINE_FINAL_OUTCOMES: t.Dict[TestStatus, str] = {
TestStatus.PASS: _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
TestStatus.FAIL: _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
}


def atr_handle_retries(
test_id: InternalTestId,
item: pytest.Item,
when: str,
original_result: pytest_TestReport,
test_outcome: _TestOutcome,
is_quarantined: bool = False,
):
if is_quarantined:
retry_outcomes = _QUARANTINE_ATR_RETRY_OUTCOMES
final_outcomes = _QUARANTINE_FINAL_OUTCOMES
else:
retry_outcomes = _ATR_RETRY_OUTCOMES
final_outcomes = _FINAL_OUTCOMES

outcomes = RetryOutcomes(
PASSED=retry_outcomes.ATR_ATTEMPT_PASSED,
FAILED=retry_outcomes.ATR_ATTEMPT_FAILED,
SKIPPED=retry_outcomes.ATR_ATTEMPT_SKIPPED,
XFAIL=retry_outcomes.ATR_ATTEMPT_PASSED,
XPASS=retry_outcomes.ATR_ATTEMPT_FAILED,
)

# Overwrite the original result to avoid double-counting when displaying totals in final summary
if when == "call":
if test_outcome.status == TestStatus.FAIL:
original_result.outcome = _ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED
return
if InternalTest.get_tag(test_id, "_dd.ci.atr_setup_failed"):
log.debug("Test item %s failed during setup, will not be retried for Early Flake Detection")
return
if InternalTest.get_tag(test_id, "_dd.ci.atr_teardown_failed"):
# NOTE: tests that passed their call but failed during teardown are not retried
log.debug("Test item %s failed during teardown, will not be retried for Early Flake Detection")
romainkomorn-exdatadog marked this conversation as resolved.
Show resolved Hide resolved
original_result.outcome = outcomes.FAILED
return

atr_outcome = _atr_do_retries(item)
atr_outcome = _atr_do_retries(item, outcomes)

final_report = pytest_TestReport(
nodeid=item.nodeid,
location=item.location,
keywords=item.keywords,
when="call",
longrepr=None,
outcome=_FINAL_OUTCOMES[atr_outcome],
outcome=final_outcomes[atr_outcome],
)
item.ihook.pytest_runtest_logreport(report=final_report)

Expand All @@ -72,15 +95,7 @@ def atr_get_failed_reports(terminalreporter: _pytest.terminal.TerminalReporter)
return terminalreporter.getreports(_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED)


def _atr_do_retries(item: pytest.Item) -> TestStatus:
outcomes = RetryOutcomes(
PASSED=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED,
FAILED=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED,
SKIPPED=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_SKIPPED,
XFAIL=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED,
XPASS=_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED,
)

def _atr_do_retries(item: pytest.Item, outcomes: RetryOutcomes) -> TestStatus:
test_id = _get_test_id_from_item(item)

while InternalTest.atr_should_retry(test_id):
Expand Down Expand Up @@ -160,21 +175,21 @@ def atr_pytest_terminal_summary_post_yield(terminalreporter: _pytest.terminal.Te

_atr_write_report_for_status(
terminalreporter,
_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
"failed",
PYTEST_STATUS.FAILED,
raw_summary_strings,
markedup_summary_strings,
status_key=_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
status_text="failed",
report_outcome=PYTEST_STATUS.FAILED,
raw_strings=raw_summary_strings,
markedup_strings=markedup_summary_strings,
color="red",
)

_atr_write_report_for_status(
terminalreporter,
_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
"passed",
PYTEST_STATUS.PASSED,
raw_summary_strings,
markedup_summary_strings,
status_key=_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
status_text="passed",
report_outcome=PYTEST_STATUS.PASSED,
raw_strings=raw_summary_strings,
markedup_strings=markedup_summary_strings,
color="green",
)

Expand Down Expand Up @@ -268,3 +283,47 @@ def atr_get_teststatus(report: pytest_TestReport) -> _pytest_report_teststatus_r
if report.outcome == _ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED:
return (_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED, "F", ("ATR FINAL STATUS: FAILED", {"red": True}))
return None


def quarantine_atr_get_teststatus(report: pytest_TestReport) -> _pytest_report_teststatus_return_type:
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED,
"q",
(f"QUARANTINED RETRY {_get_retry_attempt_string(report.nodeid)}PASSED", {"blue": True}),
)
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED,
"Q",
(f"QUARANTINED RETRY {_get_retry_attempt_string(report.nodeid)}FAILED", {"blue": True}),
)
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_SKIPPED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_SKIPPED,
"q",
(f"QUARANTINED RETRY {_get_retry_attempt_string(report.nodeid)}SKIPPED", {"blue": True}),
)
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED,
".",
("QUARANTINED FINAL STATUS: PASSED", {"blue": True}),
)
if report.outcome == _QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED:
return (
_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED,
"F",
("QUARANTINED FINAL STATUS: FAILED", {"blue": True}),
)
return None


def quarantine_pytest_terminal_summary_post_yield(terminalreporter: _pytest.terminal.TerminalReporter):
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_PASSED, None)
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_FAILED, None)
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_ATTEMPT_SKIPPED, None)
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_PASSED, [])
terminalreporter.stats.pop(_QUARANTINE_ATR_RETRY_OUTCOMES.ATR_FINAL_FAILED, [])

# TODO: report list of fully failed quarantined tests, possibly inside the ATR report.
35 changes: 32 additions & 3 deletions ddtrace/contrib/pytest/_plugin_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,15 @@
from ddtrace.contrib.pytest._atr_utils import atr_get_teststatus
from ddtrace.contrib.pytest._atr_utils import atr_handle_retries
from ddtrace.contrib.pytest._atr_utils import atr_pytest_terminal_summary_post_yield
from ddtrace.contrib.pytest._atr_utils import quarantine_atr_get_teststatus
from ddtrace.contrib.pytest._atr_utils import quarantine_pytest_terminal_summary_post_yield

log = get_logger(__name__)


_NODEID_REGEX = re.compile("^((?P<module>.*)/(?P<suite>[^/]*?))::(?P<name>.*?)$")
USER_PROPERTY_QUARANTINED = "dd_quarantined"
OUTCOME_QUARANTINED = "quarantined"


def _handle_itr_should_skip(item, test_id) -> bool:
Expand Down Expand Up @@ -298,6 +302,11 @@ def _pytest_runtest_protocol_pre_yield(item) -> t.Optional[ModuleCodeCollector.C

collect_test_coverage = InternalTestSession.should_collect_coverage() and not item_will_skip

is_quarantined = InternalTest.is_quarantined_test(test_id)
if is_quarantined:
# We add this information to user_properties to have it available in pytest_runtest_makereport().
item.user_properties += [(USER_PROPERTY_QUARANTINED, True)]

if collect_test_coverage:
return _start_collecting_coverage()

Expand Down Expand Up @@ -428,6 +437,8 @@ def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome

test_id = _get_test_id_from_item(item)

is_quarantined = InternalTest.is_quarantined_test(test_id)

test_outcome = _process_result(item, call, original_result)

# A None value for test_outcome.status implies the test has not finished yet
Expand All @@ -439,6 +450,11 @@ def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome
if not InternalTest.is_finished(test_id):
InternalTest.finish(test_id, test_outcome.status, test_outcome.skip_reason, test_outcome.exc_info)

if original_result.failed and is_quarantined:
# Ensure test doesn't count as failed for pytest's exit status logic
# (see <https://github.com/pytest-dev/pytest/blob/8.3.x/src/_pytest/main.py#L654>).
original_result.outcome = OUTCOME_QUARANTINED

# ATR and EFD retry tests only if their teardown succeeded to ensure the best chance the retry will succeed
# NOTE: this mutates the original result's outcome
if InternalTest.stash_get(test_id, "setup_failed") or InternalTest.stash_get(test_id, "teardown_failed"):
Expand All @@ -447,7 +463,7 @@ def _pytest_runtest_makereport(item: pytest.Item, call: pytest_CallInfo, outcome
if InternalTestSession.efd_enabled() and InternalTest.efd_should_retry(test_id):
return efd_handle_retries(test_id, item, call.when, original_result, test_outcome)
if InternalTestSession.atr_is_enabled() and InternalTest.atr_should_retry(test_id):
return atr_handle_retries(test_id, item, call.when, original_result, test_outcome)
return atr_handle_retries(test_id, item, call.when, original_result, test_outcome, is_quarantined)


@pytest.hookimpl(hookwrapper=True)
Expand Down Expand Up @@ -505,6 +521,9 @@ def _pytest_terminal_summary_post_yield(terminalreporter, failed_reports_initial

if _pytest_version_supports_atr() and InternalTestSession.atr_is_enabled():
atr_pytest_terminal_summary_post_yield(terminalreporter)

quarantine_pytest_terminal_summary_post_yield(terminalreporter)

return


Expand Down Expand Up @@ -537,7 +556,7 @@ def _pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:

if InternalTestSession.efd_enabled() and InternalTestSession.efd_has_failed_tests():
session.exitstatus = pytest.ExitCode.TESTS_FAILED
if InternalTestSession.atr_has_failed_tests() and InternalTestSession.atr_has_failed_tests():
if InternalTestSession.atr_is_enabled() and InternalTestSession.atr_has_failed_tests():
session.exitstatus = pytest.ExitCode.TESTS_FAILED

invoked_by_coverage_run_status = _is_coverage_invoked_by_coverage_run()
Expand Down Expand Up @@ -575,7 +594,7 @@ def pytest_report_teststatus(
return

if _pytest_version_supports_atr() and InternalTestSession.atr_is_enabled():
test_status = atr_get_teststatus(report)
test_status = atr_get_teststatus(report) or quarantine_atr_get_teststatus(report)
if test_status is not None:
return test_status

Expand All @@ -584,6 +603,16 @@ def pytest_report_teststatus(
if test_status is not None:
return test_status

user_properties = getattr(report, "user_properties", [])
is_quarantined = (USER_PROPERTY_QUARANTINED, True) in user_properties
if is_quarantined:
if report.when == "teardown":
return (OUTCOME_QUARANTINED, "q", ("QUARANTINED", {"blue": True}))
else:
# Don't show anything for setup and call of quarantined tests, regardless of
# whether there were errors or not.
return ("", "", "")
vitor-de-araujo marked this conversation as resolved.
Show resolved Hide resolved


@pytest.hookimpl(trylast=True)
def pytest_ddtrace_get_item_module_name(item):
Expand Down
4 changes: 3 additions & 1 deletion ddtrace/contrib/pytest/_retry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def _get_outcome_from_retry(
# _initrequest() needs to be called first because the test has already executed once
item._initrequest()

# Reset output capture across retries.
item._report_sections = []

# Setup
setup_call, setup_report = _retry_run_when(item, "setup", outcomes)
if setup_report.outcome == outcomes.FAILED:
Expand All @@ -75,7 +78,6 @@ def _get_outcome_from_retry(
_outcome_status = TestStatus.SKIP
elif call_report.outcome == outcomes.PASSED:
_outcome_status = TestStatus.PASS

# Teardown does not happen if setup skipped
if not setup_report.skipped:
teardown_call, teardown_report = _retry_run_when(item, "teardown", outcomes)
Expand Down
20 changes: 19 additions & 1 deletion ddtrace/internal/ci_visibility/_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from http.client import RemoteDisconnected
import json
from json import JSONDecodeError
import os
import socket
import typing as t
from uuid import uuid4
Expand Down Expand Up @@ -38,6 +39,7 @@
from ddtrace.internal.logger import get_logger
from ddtrace.internal.test_visibility._internal_item_ids import InternalTestId
from ddtrace.internal.test_visibility.coverage_lines import CoverageLines
from ddtrace.internal.utils.formats import asbool
from ddtrace.internal.utils.http import ConnectionType
from ddtrace.internal.utils.http import Response
from ddtrace.internal.utils.http import get_connection
Expand Down Expand Up @@ -87,6 +89,11 @@ class EarlyFlakeDetectionSettings:
faulty_session_threshold: int = 30


@dataclasses.dataclass(frozen=True)
class QuarantineSettings:
enabled: bool = False


@dataclasses.dataclass(frozen=True)
class TestVisibilityAPISettings:
__test__ = False
Expand All @@ -96,6 +103,7 @@ class TestVisibilityAPISettings:
itr_enabled: bool = False
flaky_test_retries_enabled: bool = False
early_flake_detection: EarlyFlakeDetectionSettings = dataclasses.field(default_factory=EarlyFlakeDetectionSettings)
quarantine: QuarantineSettings = dataclasses.field(default_factory=QuarantineSettings)


@dataclasses.dataclass(frozen=True)
Expand Down Expand Up @@ -359,7 +367,9 @@ def fetch_settings(self) -> TestVisibilityAPISettings:
skipping_enabled = attributes["tests_skipping"]
require_git = attributes["require_git"]
itr_enabled = attributes["itr_enabled"]
flaky_test_retries_enabled = attributes["flaky_test_retries_enabled"]
flaky_test_retries_enabled = attributes["flaky_test_retries_enabled"] or asbool(
os.getenv("_DD_TEST_FORCE_ENABLE_ATR")
)

if attributes["early_flake_detection"]["enabled"]:
early_flake_detection = EarlyFlakeDetectionSettings(
Expand All @@ -372,6 +382,12 @@ def fetch_settings(self) -> TestVisibilityAPISettings:
)
else:
early_flake_detection = EarlyFlakeDetectionSettings()

quarantine = QuarantineSettings(
enabled=attributes.get("quarantine", {}).get("enabled", False)
or asbool(os.getenv("_DD_TEST_FORCE_ENABLE_QUARANTINE"))
)

except KeyError:
record_api_request_error(metric_names.error, ERROR_TYPES.UNKNOWN)
raise
Expand All @@ -383,6 +399,7 @@ def fetch_settings(self) -> TestVisibilityAPISettings:
itr_enabled=itr_enabled,
flaky_test_retries_enabled=flaky_test_retries_enabled,
early_flake_detection=early_flake_detection,
quarantine=quarantine,
)

record_settings_response(
Expand All @@ -391,6 +408,7 @@ def fetch_settings(self) -> TestVisibilityAPISettings:
api_settings.require_git,
api_settings.itr_enabled,
api_settings.early_flake_detection.enabled,
api_settings.quarantine.enabled,
)

return api_settings
Expand Down
Loading
Loading