From 5ef63ce006b5ccfa7c00abafc93940a4680b412d Mon Sep 17 00:00:00 2001 From: Rachel Yang Date: Wed, 27 Nov 2024 13:01:45 -0800 Subject: [PATCH] chore: add support for max items in baggage (#11421) Fixing baggage max items implementation based off the change in the RFC. "If baggage data exceeds one or both of these limits, APM SDKs should drop baggage name/value pairs until both conditions are met. For example, if baggage contains 70 name/value pairs, the SDK should only add 64 of them to the baggage header and drop the other 6. The W3C leaves the process of selecting which pairs to keep or drop up to implementers. The simplest algorithm in this example is to keep the first 64 pairs and drop the last 6." Also updating the test that checks max baggage items. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) (cherry picked from commit 8eb0b443203ea470fb1e482497918bb578c059e1) --- ddtrace/propagation/http.py | 31 ++++++++++++---------- tests/tracer/test_propagation.py | 45 ++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/ddtrace/propagation/http.py b/ddtrace/propagation/http.py index 343f653f548..a1664664ace 100644 --- a/ddtrace/propagation/http.py +++ b/ddtrace/propagation/http.py @@ -1,3 +1,4 @@ +import itertools import re import sys from typing import Any # noqa:F401 @@ -912,21 +913,23 @@ def _inject(span_context: Context, headers: Dict[str, str]) -> None: if not baggage_items: return - if len(baggage_items) > DD_TRACE_BAGGAGE_MAX_ITEMS: - log.warning("Baggage item limit exceeded") - return - try: - header_value = ",".join( - f"{_BaggageHeader._encode_key(key)}={_BaggageHeader._encode_value(value)}" - for key, value in baggage_items - ) - - buf = bytes(header_value, "utf-8") - if len(buf) > DD_TRACE_BAGGAGE_MAX_BYTES: - log.warning("Baggage header size exceeded") - return - + if len(baggage_items) > DD_TRACE_BAGGAGE_MAX_ITEMS: + log.warning("Baggage item limit exceeded, dropping excess items") + baggage_items = itertools.islice(baggage_items, DD_TRACE_BAGGAGE_MAX_ITEMS) # type: ignore + + encoded_items: List[str] = [] + total_size = 0 + for key, value in baggage_items: + item = f"{_BaggageHeader._encode_key(key)}={_BaggageHeader._encode_value(value)}" + item_size = len(item.encode("utf-8")) + (1 if encoded_items else 0) # +1 for comma if not first item + if total_size + item_size > DD_TRACE_BAGGAGE_MAX_BYTES: + log.warning("Baggage header size exceeded, dropping excess items") + break # stop adding items when size limit is reached + encoded_items.append(item) + total_size += item_size + + header_value = ",".join(encoded_items) headers[_HTTP_HEADER_BAGGAGE] = header_value except Exception: diff --git a/tests/tracer/test_propagation.py b/tests/tracer/test_propagation.py index e4bf9be6ef3..64ce8232524 100644 --- a/tests/tracer/test_propagation.py +++ b/tests/tracer/test_propagation.py @@ -38,6 +38,7 @@ from ddtrace.propagation.http import HTTP_HEADER_SAMPLING_PRIORITY from ddtrace.propagation.http import HTTP_HEADER_TRACE_ID from ddtrace.propagation.http import HTTPPropagator +from ddtrace.propagation.http import _BaggageHeader from ddtrace.propagation.http import _TraceContext from tests.contrib.fastapi.conftest import client as fastapi_client # noqa:F401 from tests.contrib.fastapi.conftest import fastapi_application # noqa:F401 @@ -3134,16 +3135,15 @@ def test_llmobs_parent_id_not_injected_by_default(): ], ) def test_baggageheader_inject(span_context, expected_headers): - from ddtrace.propagation.http import _BaggageHeader - headers = {} _BaggageHeader._inject(span_context, headers) assert headers == expected_headers def test_baggageheader_maxitems_inject(): + import urllib.parse + from ddtrace.internal.constants import DD_TRACE_BAGGAGE_MAX_ITEMS - from ddtrace.propagation.http import _BaggageHeader headers = {} baggage_items = {} @@ -3151,18 +3151,45 @@ def test_baggageheader_maxitems_inject(): baggage_items[f"key{i}"] = f"val{i}" span_context = Context(baggage=baggage_items) _BaggageHeader._inject(span_context, headers) - assert "baggage" not in headers + assert "baggage" in headers + header_value = headers["baggage"] + items = header_value.split(",") + assert len(items) == DD_TRACE_BAGGAGE_MAX_ITEMS + + expected_keys = [f"key{i}" for i in range(DD_TRACE_BAGGAGE_MAX_ITEMS)] + for item in items: + key, value = item.split("=", 1) + key = urllib.parse.unquote(key) + assert key in expected_keys def test_baggageheader_maxbytes_inject(): from ddtrace.internal.constants import DD_TRACE_BAGGAGE_MAX_BYTES - from ddtrace.propagation.http import _BaggageHeader headers = {} - baggage_items = {"foo": ("a" * DD_TRACE_BAGGAGE_MAX_BYTES)} + # baggage item that exceeds the maximum byte size + baggage_items = {"foo": "a" * (DD_TRACE_BAGGAGE_MAX_BYTES + 1)} + span_context = Context(baggage=baggage_items) + _BaggageHeader._inject(span_context, headers) + # since the baggage item exceeds the max bytes, no header should be injected + header_value = headers["baggage"] + assert header_value == "" + + # multiple baggage items to test dropping items when the total size exceeds the limit + headers = {} + baggage_items = { + "key1": "a" * ((DD_TRACE_BAGGAGE_MAX_BYTES // 3)), + "key2": "b" * ((DD_TRACE_BAGGAGE_MAX_BYTES // 3)), + "key3": "c" * ((DD_TRACE_BAGGAGE_MAX_BYTES // 3)), + "key4": "d", + } span_context = Context(baggage=baggage_items) _BaggageHeader._inject(span_context, headers) - assert "baggage" not in headers + header_value = headers["baggage"] + header_size = len(header_value.encode("utf-8")) + assert header_size <= DD_TRACE_BAGGAGE_MAX_BYTES + assert "key4" not in header_value + assert "key2" in header_value @pytest.mark.parametrize( @@ -3188,8 +3215,6 @@ def test_baggageheader_maxbytes_inject(): ], ) def test_baggageheader_extract(headers, expected_baggage): - from ddtrace.propagation.http import _BaggageHeader - context = _BaggageHeader._extract(headers) assert context._baggage == expected_baggage @@ -3210,8 +3235,6 @@ def test_baggageheader_extract(headers, expected_baggage): ], ) def test_baggage_malformedheader_extract(headers, expected_baggage): - from ddtrace.propagation.http import _BaggageHeader - context = _BaggageHeader._extract(headers) assert context._baggage == expected_baggage