From 95743ab2094e3c5c225398e9b112eed87a4a6ec8 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Sat, 12 Oct 2024 23:11:32 -0400 Subject: [PATCH] DEBUG-2334 duplicate mutable values when serializing for dynamic instrumentation When serializing parameter values at method entry, the resulting snapshot should not change if the parameter is mutated during method execution. This currently only applies to string values as other values used as serializer output are not mutable. This commit duplicates the string values which achieves immutability with no additional API complexity but at the cost of duplicating all strings, even if they are not entry parameters and thus can be stored without duplication. --- lib/datadog/di/serializer.rb | 19 ++++++++++++++- spec/datadog/di/serializer_spec.rb | 39 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/lib/datadog/di/serializer.rb b/lib/datadog/di/serializer.rb index 7958302dc86..7bfd85393d2 100644 --- a/lib/datadog/di/serializer.rb +++ b/lib/datadog/di/serializer.rb @@ -26,6 +26,16 @@ module DI # All serialization methods take the names of the variables being # serialized in order to be able to redact values. # + # The result of serialization should not reference parameter values when + # the values are mutable (currently, this only applies to string values). + # Serializer will duplicate such mutable values, so that if method + # arguments are captured at entry and then modified during method execution, + # the serialized values from entry are correctly preserved. + # Alternatively, we could pass a parameter to the serialization methods + # which would control whether values are duplicated. This may be more + # efficient but there would be additional overhead from passing this + # parameter all the time and the API would get more complex. + # # @api private class Serializer def initialize(settings, redactor) @@ -92,7 +102,14 @@ def serialize_value(value, name: nil, depth: settings.dynamic_instrumentation.ma when Integer, Float, TrueClass, FalseClass serialized.update(value: value.to_s) when String, Symbol - value = value.to_s + value = case value + when String + # This is the only place where we duplicate the value, currently. + # All other values are immutable primitives (e.g. numbers). + value.dup + else + value.to_s + end max = settings.dynamic_instrumentation.max_capture_string_length if value.length > max serialized.update(truncated: true, size: value.length) diff --git a/spec/datadog/di/serializer_spec.rb b/spec/datadog/di/serializer_spec.rb index a19d02b79e7..f1d0a974f7d 100644 --- a/spec/datadog/di/serializer_spec.rb +++ b/spec/datadog/di/serializer_spec.rb @@ -320,5 +320,44 @@ def self.define_cases(cases) end end end + + context 'when positional arg is mutated' do + let(:args) do + ['hello', 'world'] + end + + let(:kwargs) { {} } + + it 'preserves original value' do + serialized + + args.first.gsub!('hello', 'bye') + + expect(serialized).to eq( + arg1: {type: 'String', value: 'hello'}, + arg2: {type: 'String', value: 'world'}, + ) + end + end + + context 'when keyword arg is mutated' do + let(:args) do + [] + end + + let(:kwargs) do + {foo: 'bar'} + end + + it 'preserves original value' do + serialized + + kwargs[:foo].gsub!('bar', 'bye') + + expect(serialized).to eq( + foo: {type: 'String', value: 'bar'}, + ) + end + end end end