diff --git a/pull-scudo.sh b/pull-scudo.sh index 90b2c32..5a1eb96 100755 --- a/pull-scudo.sh +++ b/pull-scudo.sh @@ -13,7 +13,7 @@ # # Usage: pull-scudo.sh -version="llvmorg-13.0.0" # Keep me up to date! +version="llvmorg-18.1.8" # Keep me up to date! tmp_repo=$(mktemp -d) tmp_license=$(mktemp) diff --git a/third_party/llvm-scudo-standalone/CMakeLists.txt b/third_party/llvm-scudo-standalone/CMakeLists.txt index d6ffa44..6009200 100644 --- a/third_party/llvm-scudo-standalone/CMakeLists.txt +++ b/third_party/llvm-scudo-standalone/CMakeLists.txt @@ -7,11 +7,13 @@ set(SCUDO_CFLAGS) list(APPEND SCUDO_CFLAGS -Werror=conversion -Wall + -Wextra + -pedantic -g -nostdinc++) # Remove -stdlib= which is unused when passing -nostdinc++. -string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) +string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") append_list_if(COMPILER_RT_HAS_FVISIBILITY_HIDDEN_FLAG -fvisibility=hidden SCUDO_CFLAGS) @@ -23,11 +25,14 @@ append_list_if(COMPILER_RT_HAS_WNO_PEDANTIC -Wno-pedantic SCUDO_CFLAGS) append_list_if(COMPILER_RT_HAS_FNO_LTO_FLAG -fno-lto SCUDO_CFLAGS) if(COMPILER_RT_DEBUG) - list(APPEND SCUDO_CFLAGS -O0 -DSCUDO_DEBUG=1) + list(APPEND SCUDO_CFLAGS -O0 -DSCUDO_DEBUG=1 -DSCUDO_ENABLE_HOOKS=1) else() list(APPEND SCUDO_CFLAGS -O3) endif() +append_list_if(COMPILER_RT_HAS_WTHREAD_SAFETY_FLAG -Werror=thread-safety + SCUDO_CFLAGS) + set(SCUDO_LINK_FLAGS) list(APPEND SCUDO_LINK_FLAGS -Wl,-z,defs,-z,now,-z,relro) @@ -36,6 +41,11 @@ list(APPEND SCUDO_LINK_FLAGS -ffunction-sections -fdata-sections -Wl,--gc-sectio # We don't use the C++ standard library, so avoid including it by mistake. append_list_if(COMPILER_RT_HAS_NOSTDLIBXX_FLAG -nostdlib++ SCUDO_LINK_FLAGS) +append_list_if(CXX_SUPPORTS_UNWINDLIB_NONE_FLAG --unwindlib=none SCUDO_LINK_FLAGS) + +if(COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH) + list(APPEND SCUDO_CFLAGS "--sysroot=${COMPILER_RT_SCUDO_STANDALONE_SYSROOT_PATH}") +endif() if(ANDROID) list(APPEND SCUDO_CFLAGS -fno-emulated-tls) @@ -46,11 +56,15 @@ if(ANDROID) endif() set(SCUDO_HEADERS + allocator_common.h allocator_config.h atomic_helpers.h bytemap.h checksum.h chunk.h + condition_variable.h + condition_variable_base.h + condition_variable_linux.h combined.h common.h flags_parser.h @@ -61,6 +75,10 @@ set(SCUDO_HEADERS list.h local_cache.h memtag.h + mem_map.h + mem_map_base.h + mem_map_fuchsia.h + mem_map_linux.h mutex.h options.h platform.h @@ -69,11 +87,13 @@ set(SCUDO_HEADERS quarantine.h release.h report.h + report_linux.h secondary.h size_class_map.h stack_depot.h stats.h string_utils.h + timing.h tsd_exclusive.h tsd_shared.h tsd.h @@ -87,18 +107,34 @@ set(SCUDO_HEADERS set(SCUDO_SOURCES checksum.cpp common.cpp + condition_variable_linux.cpp crc32_hw.cpp flags_parser.cpp flags.cpp fuchsia.cpp linux.cpp + mem_map.cpp + mem_map_fuchsia.cpp + mem_map_linux.cpp release.cpp report.cpp + report_linux.cpp string_utils.cpp + timing.cpp ) -# Enable the SSE 4.2 instruction set for crc32_hw.cpp, if available. -if (COMPILER_RT_HAS_MSSE4_2_FLAG) +# Temporary hack until LLVM libc supports inttypes.h print format macros +# See: https://github.com/llvm/llvm-project/issues/63317#issuecomment-1591906241 +if(LLVM_LIBC_INCLUDE_SCUDO) + list(REMOVE_ITEM SCUDO_HEADERS timing.h) + list(REMOVE_ITEM SCUDO_SOURCES timing.cpp) +endif() + +# Enable the necessary instruction set for scudo_crc32.cpp, if available. +# Newer compiler versions use -mcrc32 rather than -msse4.2. +if (COMPILER_RT_HAS_MCRC32_FLAG) + set_source_files_properties(crc32_hw.cpp PROPERTIES COMPILE_FLAGS -mcrc32) +elseif (COMPILER_RT_HAS_MSSE4_2_FLAG) set_source_files_properties(crc32_hw.cpp PROPERTIES COMPILE_FLAGS -msse4.2) endif() @@ -117,8 +153,19 @@ set(SCUDO_SOURCES_CXX_WRAPPERS ) set(SCUDO_OBJECT_LIBS) +set(SCUDO_LINK_LIBS) if (COMPILER_RT_HAS_GWP_ASAN) + if(COMPILER_RT_USE_LLVM_UNWINDER) + list(APPEND SCUDO_LINK_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS} dl) + elseif (COMPILER_RT_HAS_GCC_S_LIB) + list(APPEND SCUDO_LINK_LIBS gcc_s) + elseif (COMPILER_RT_HAS_GCC_LIB) + list(APPEND SCUDO_LINK_LIBS gcc) + elseif (NOT COMPILER_RT_USE_BUILTINS_LIBRARY) + message(FATAL_ERROR "No suitable unwinder library") + endif() + add_dependencies(scudo_standalone gwp_asan) list(APPEND SCUDO_OBJECT_LIBS RTGwpAsan RTGwpAsanBacktraceLibc RTGwpAsanSegvHandler @@ -131,28 +178,41 @@ if (COMPILER_RT_HAS_GWP_ASAN) endif() -set(SCUDO_LINK_LIBS) +if(COMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC) + include_directories(${COMPILER_RT_BINARY_DIR}/../libc/include/) + + set(SCUDO_DEPS libc-headers) + + list(APPEND SCUDO_CFLAGS "-ffreestanding") +endif() append_list_if(COMPILER_RT_HAS_LIBPTHREAD -pthread SCUDO_LINK_FLAGS) append_list_if(FUCHSIA zircon SCUDO_LINK_LIBS) +if(COMPILER_RT_DEFAULT_TARGET_ARCH MATCHES "mips|mips64|mipsel|mips64el") + list(APPEND SCUDO_LINK_LIBS atomic) +endif() + if(COMPILER_RT_HAS_SCUDO_STANDALONE) add_compiler_rt_object_libraries(RTScudoStandalone ARCHS ${SCUDO_STANDALONE_SUPPORTED_ARCH} SOURCES ${SCUDO_SOURCES} ADDITIONAL_HEADERS ${SCUDO_HEADERS} - CFLAGS ${SCUDO_CFLAGS}) + CFLAGS ${SCUDO_CFLAGS} + DEPS ${SCUDO_DEPS}) add_compiler_rt_object_libraries(RTScudoStandaloneCWrappers ARCHS ${SCUDO_STANDALONE_SUPPORTED_ARCH} SOURCES ${SCUDO_SOURCES_C_WRAPPERS} ADDITIONAL_HEADERS ${SCUDO_HEADERS} - CFLAGS ${SCUDO_CFLAGS}) + CFLAGS ${SCUDO_CFLAGS} + DEPS ${SCUDO_DEPS}) add_compiler_rt_object_libraries(RTScudoStandaloneCxxWrappers ARCHS ${SCUDO_STANDALONE_SUPPORTED_ARCH} SOURCES ${SCUDO_SOURCES_CXX_WRAPPERS} ADDITIONAL_HEADERS ${SCUDO_HEADERS} - CFLAGS ${SCUDO_CFLAGS}) + CFLAGS ${SCUDO_CFLAGS} + DEPS ${SCUDO_DEPS}) add_compiler_rt_runtime(clang_rt.scudo_standalone STATIC @@ -160,6 +220,7 @@ if(COMPILER_RT_HAS_SCUDO_STANDALONE) SOURCES ${SCUDO_SOURCES} ${SCUDO_SOURCES_C_WRAPPERS} ADDITIONAL_HEADERS ${SCUDO_HEADERS} CFLAGS ${SCUDO_CFLAGS} + DEPS ${SCUDO_DEPS} OBJECT_LIBS ${SCUDO_OBJECT_LIBS} PARENT_TARGET scudo_standalone) add_compiler_rt_runtime(clang_rt.scudo_standalone_cxx @@ -168,18 +229,22 @@ if(COMPILER_RT_HAS_SCUDO_STANDALONE) SOURCES ${SCUDO_SOURCES_CXX_WRAPPERS} ADDITIONAL_HEADERS ${SCUDO_HEADERS} CFLAGS ${SCUDO_CFLAGS} + DEPS ${SCUDO_DEPS} PARENT_TARGET scudo_standalone) - add_compiler_rt_runtime(clang_rt.scudo_standalone - SHARED - ARCHS ${SCUDO_STANDALONE_SUPPORTED_ARCH} - SOURCES ${SCUDO_SOURCES} ${SCUDO_SOURCES_C_WRAPPERS} ${SCUDO_SOURCES_CXX_WRAPPERS} - ADDITIONAL_HEADERS ${SCUDO_HEADERS} - CFLAGS ${SCUDO_CFLAGS} - OBJECT_LIBS ${SCUDO_OBJECT_LIBS} - LINK_FLAGS ${SCUDO_LINK_FLAGS} - LINK_LIBS ${SCUDO_LINK_LIBS} - PARENT_TARGET scudo_standalone) + if(COMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED) + add_compiler_rt_runtime(clang_rt.scudo_standalone + SHARED + ARCHS ${SCUDO_STANDALONE_SUPPORTED_ARCH} + SOURCES ${SCUDO_SOURCES} ${SCUDO_SOURCES_C_WRAPPERS} ${SCUDO_SOURCES_CXX_WRAPPERS} + ADDITIONAL_HEADERS ${SCUDO_HEADERS} + CFLAGS ${SCUDO_CFLAGS} + DEPS ${SCUDO_DEPS} + OBJECT_LIBS ${SCUDO_OBJECT_LIBS} + LINK_FLAGS ${SCUDO_LINK_FLAGS} + LINK_LIBS ${SCUDO_LINK_LIBS} + PARENT_TARGET scudo_standalone) + endif() add_subdirectory(benchmarks) if(COMPILER_RT_INCLUDE_TESTS) diff --git a/third_party/llvm-scudo-standalone/allocator_common.h b/third_party/llvm-scudo-standalone/allocator_common.h new file mode 100644 index 0000000..95f4776 --- /dev/null +++ b/third_party/llvm-scudo-standalone/allocator_common.h @@ -0,0 +1,85 @@ +//===-- allocator_common.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_ALLOCATOR_COMMON_H_ +#define SCUDO_ALLOCATOR_COMMON_H_ + +#include "common.h" +#include "list.h" + +namespace scudo { + +template struct TransferBatch { + typedef typename SizeClassAllocator::SizeClassMap SizeClassMap; + typedef typename SizeClassAllocator::CompactPtrT CompactPtrT; + + static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint; + void setFromArray(CompactPtrT *Array, u16 N) { + DCHECK_LE(N, MaxNumCached); + Count = N; + memcpy(Batch, Array, sizeof(Batch[0]) * Count); + } + void appendFromArray(CompactPtrT *Array, u16 N) { + DCHECK_LE(N, MaxNumCached - Count); + memcpy(Batch + Count, Array, sizeof(Batch[0]) * N); + // u16 will be promoted to int by arithmetic type conversion. + Count = static_cast(Count + N); + } + void appendFromTransferBatch(TransferBatch *B, u16 N) { + DCHECK_LE(N, MaxNumCached - Count); + DCHECK_GE(B->Count, N); + // Append from the back of `B`. + memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N); + // u16 will be promoted to int by arithmetic type conversion. + Count = static_cast(Count + N); + B->Count = static_cast(B->Count - N); + } + void clear() { Count = 0; } + void add(CompactPtrT P) { + DCHECK_LT(Count, MaxNumCached); + Batch[Count++] = P; + } + void moveToArray(CompactPtrT *Array) { + memcpy(Array, Batch, sizeof(Batch[0]) * Count); + clear(); + } + u16 getCount() const { return Count; } + bool isEmpty() const { return Count == 0U; } + CompactPtrT get(u16 I) const { + DCHECK_LE(I, Count); + return Batch[I]; + } + TransferBatch *Next; + +private: + CompactPtrT Batch[MaxNumCached]; + u16 Count; +}; + +// A BatchGroup is used to collect blocks. Each group has a group id to +// identify the group kind of contained blocks. +template struct BatchGroup { + // `Next` is used by IntrusiveList. + BatchGroup *Next; + // The compact base address of each group + uptr CompactPtrGroupBase; + // Cache value of SizeClassAllocatorLocalCache::getMaxCached() + u16 MaxCachedPerBatch; + // Number of blocks pushed into this group. This is an increment-only + // counter. + uptr PushedBlocks; + // This is used to track how many bytes are not in-use since last time we + // tried to release pages. + uptr BytesInBGAtLastCheckpoint; + // Blocks are managed by TransferBatch in a list. + SinglyLinkedList> Batches; +}; + +} // namespace scudo + +#endif // SCUDO_ALLOCATOR_COMMON_H_ diff --git a/third_party/llvm-scudo-standalone/allocator_config.h b/third_party/llvm-scudo-standalone/allocator_config.h index e6f46b5..3c6aa3a 100644 --- a/third_party/llvm-scudo-standalone/allocator_config.h +++ b/third_party/llvm-scudo-standalone/allocator_config.h @@ -11,6 +11,7 @@ #include "combined.h" #include "common.h" +#include "condition_variable.h" #include "flags.h" #include "primary32.h" #include "primary64.h" @@ -19,6 +20,22 @@ #include "tsd_exclusive.h" #include "tsd_shared.h" +// To import a custom configuration, define `SCUDO_USE_CUSTOM_CONFIG` and +// aliasing the `Config` like: +// +// namespace scudo { +// // The instance of Scudo will be initiated with `Config`. +// typedef CustomConfig Config; +// // Aliasing as default configuration to run the tests with this config. +// typedef CustomConfig DefaultConfig; +// } // namespace scudo +// +// Put them in the header `custom_scudo_config.h` then you will be using the +// custom configuration and able to run all the tests as well. +#ifdef SCUDO_USE_CUSTOM_CONFIG +#include "custom_scudo_config.h" +#endif + namespace scudo { // The combined allocator uses a structure as a template argument that @@ -26,169 +43,226 @@ namespace scudo { // allocator. // // struct ExampleConfig { -// // SizeClasMmap to use with the Primary. -// using SizeClassMap = DefaultSizeClassMap; // // Indicates possible support for Memory Tagging. // static const bool MaySupportMemoryTagging = false; -// // Defines the Primary allocator to use. -// typedef SizeClassAllocator64 Primary; -// // Log2 of the size of a size class region, as used by the Primary. -// static const uptr PrimaryRegionSizeLog = 30U; -// // Defines the type and scale of a compact pointer. A compact pointer can -// // be understood as the offset of a pointer within the region it belongs -// // to, in increments of a power-of-2 scale. -// // eg: Ptr = Base + (CompactPtr << Scale). -// typedef u32 PrimaryCompactPtrT; -// static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; -// // Indicates support for offsetting the start of a region by -// // a random number of pages. Only used with primary64. -// static const bool PrimaryEnableRandomOffset = true; -// // Call map for user memory with at least this size. Only used with -// // primary64. -// static const uptr PrimaryMapSizeIncrement = 1UL << 18; -// // Defines the minimal & maximal release interval that can be set. -// static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; -// static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; -// // Defines the type of cache used by the Secondary. Some additional -// // configuration entries can be necessary depending on the Cache. -// typedef MapAllocatorNoCache SecondaryCache; +// // // Thread-Specific Data Registry used, shared or exclusive. // template using TSDRegistryT = TSDRegistrySharedT; +// +// struct Primary { +// // SizeClassMap to use with the Primary. +// using SizeClassMap = DefaultSizeClassMap; +// +// // Log2 of the size of a size class region, as used by the Primary. +// static const uptr RegionSizeLog = 30U; +// +// // Log2 of the size of block group, as used by the Primary. Each group +// // contains a range of memory addresses, blocks in the range will belong +// // to the same group. In general, single region may have 1 or 2MB group +// // size. Multiple regions will have the group size equal to the region +// // size because the region size is usually smaller than 1 MB. +// // Smaller value gives fine-grained control of memory usage but the +// // trade-off is that it may take longer time of deallocation. +// static const uptr GroupSizeLog = 20U; +// +// // Defines the type and scale of a compact pointer. A compact pointer can +// // be understood as the offset of a pointer within the region it belongs +// // to, in increments of a power-of-2 scale. +// // eg: Ptr = Base + (CompactPtr << Scale). +// typedef u32 CompactPtrT; +// static const uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; +// +// // Indicates support for offsetting the start of a region by +// // a random number of pages. Only used with primary64. +// static const bool EnableRandomOffset = true; +// +// // Call map for user memory with at least this size. Only used with +// // primary64. +// static const uptr MapSizeIncrement = 1UL << 18; +// +// // Defines the minimal & maximal release interval that can be set. +// static const s32 MinReleaseToOsIntervalMs = INT32_MIN; +// static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; +// +// // Use condition variable to shorten the waiting time of refillment of +// // freelist. Note that this depends on the implementation of condition +// // variable on each platform and the performance may vary so that it +// // doesn't guarantee a performance benefit. +// // Note that both variables have to be defined to enable it. +// static const bool UseConditionVariable = true; +// using ConditionVariableT = ConditionVariableLinux; +// }; +// // Defines the type of Primary allocator to use. +// template using PrimaryT = SizeClassAllocator64; +// +// // Defines the type of cache used by the Secondary. Some additional +// // configuration entries can be necessary depending on the Cache. +// struct Secondary { +// struct Cache { +// static const u32 EntriesArraySize = 32U; +// static const u32 QuarantineSize = 0U; +// static const u32 DefaultMaxEntriesCount = 32U; +// static const uptr DefaultMaxEntrySize = 1UL << 19; +// static const s32 MinReleaseToOsIntervalMs = INT32_MIN; +// static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; +// }; +// // Defines the type of Secondary Cache to use. +// template using CacheT = MapAllocatorCache; +// }; +// // Defines the type of Secondary allocator to use. +// template using SecondaryT = MapAllocator; // }; -// Default configurations for various platforms. +#ifndef SCUDO_USE_CUSTOM_CONFIG +// Default configurations for various platforms. Note this is only enabled when +// there's no custom configuration in the build system. struct DefaultConfig { - using SizeClassMap = DefaultSizeClassMap; static const bool MaySupportMemoryTagging = true; + template using TSDRegistryT = TSDRegistryExT; // Exclusive + struct Primary { + using SizeClassMap = DefaultSizeClassMap; #if SCUDO_CAN_USE_PRIMARY64 - typedef SizeClassAllocator64 Primary; - static const uptr PrimaryRegionSizeLog = 32U; - typedef uptr PrimaryCompactPtrT; - static const uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const uptr PrimaryMapSizeIncrement = 1UL << 18; + static const uptr RegionSizeLog = 32U; + static const uptr GroupSizeLog = 21U; + typedef uptr CompactPtrT; + static const uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const uptr MapSizeIncrement = 1UL << 18; #else - typedef SizeClassAllocator32 Primary; - static const uptr PrimaryRegionSizeLog = 19U; - typedef uptr PrimaryCompactPtrT; + static const uptr RegionSizeLog = 19U; + static const uptr GroupSizeLog = 19U; + typedef uptr CompactPtrT; +#endif + static const s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; + }; +#if SCUDO_CAN_USE_PRIMARY64 + template using PrimaryT = SizeClassAllocator64; +#else + template using PrimaryT = SizeClassAllocator32; #endif - static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - typedef MapAllocatorCache SecondaryCache; - static const u32 SecondaryCacheEntriesArraySize = 32U; - static const u32 SecondaryCacheQuarantineSize = 0U; - static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; - static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 19; - static const s32 SecondaryCacheMinReleaseToOsIntervalMs = INT32_MIN; - static const s32 SecondaryCacheMaxReleaseToOsIntervalMs = INT32_MAX; + struct Secondary { + struct Cache { + static const u32 EntriesArraySize = 32U; + static const u32 QuarantineSize = 0U; + static const u32 DefaultMaxEntriesCount = 32U; + static const uptr DefaultMaxEntrySize = 1UL << 19; + static const s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; + }; + template using CacheT = MapAllocatorCache; + }; - template using TSDRegistryT = TSDRegistryExT; // Exclusive + template using SecondaryT = MapAllocator; }; + +#endif // SCUDO_USE_CUSTOM_CONFIG + struct AndroidConfig { - using SizeClassMap = AndroidSizeClassMap; static const bool MaySupportMemoryTagging = true; + template + using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. + struct Primary { + using SizeClassMap = AndroidSizeClassMap; #if SCUDO_CAN_USE_PRIMARY64 - typedef SizeClassAllocator64 Primary; - static const uptr PrimaryRegionSizeLog = 28U; - typedef u32 PrimaryCompactPtrT; - static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; - static const bool PrimaryEnableRandomOffset = true; - static const uptr PrimaryMapSizeIncrement = 1UL << 18; + static const uptr RegionSizeLog = 28U; + typedef u32 CompactPtrT; + static const uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const uptr GroupSizeLog = 20U; + static const bool EnableRandomOffset = true; + static const uptr MapSizeIncrement = 1UL << 18; #else - typedef SizeClassAllocator32 Primary; - static const uptr PrimaryRegionSizeLog = 18U; - typedef uptr PrimaryCompactPtrT; + static const uptr RegionSizeLog = 18U; + static const uptr GroupSizeLog = 18U; + typedef uptr CompactPtrT; #endif - static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; - static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; - - typedef MapAllocatorCache SecondaryCache; - static const u32 SecondaryCacheEntriesArraySize = 256U; - static const u32 SecondaryCacheQuarantineSize = 32U; - static const u32 SecondaryCacheDefaultMaxEntriesCount = 32U; - static const uptr SecondaryCacheDefaultMaxEntrySize = 2UL << 20; - static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0; - static const s32 SecondaryCacheMaxReleaseToOsIntervalMs = 1000; - - template - using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. -}; - -struct AndroidSvelteConfig { - using SizeClassMap = SvelteSizeClassMap; - static const bool MaySupportMemoryTagging = false; - + static const s32 MinReleaseToOsIntervalMs = 1000; + static const s32 MaxReleaseToOsIntervalMs = 1000; + }; #if SCUDO_CAN_USE_PRIMARY64 - typedef SizeClassAllocator64 Primary; - static const uptr PrimaryRegionSizeLog = 27U; - typedef u32 PrimaryCompactPtrT; - static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; - static const bool PrimaryEnableRandomOffset = true; - static const uptr PrimaryMapSizeIncrement = 1UL << 18; + template using PrimaryT = SizeClassAllocator64; #else - typedef SizeClassAllocator32 Primary; - static const uptr PrimaryRegionSizeLog = 16U; - typedef uptr PrimaryCompactPtrT; + template using PrimaryT = SizeClassAllocator32; #endif - static const s32 PrimaryMinReleaseToOsIntervalMs = 1000; - static const s32 PrimaryMaxReleaseToOsIntervalMs = 1000; - typedef MapAllocatorCache SecondaryCache; - static const u32 SecondaryCacheEntriesArraySize = 16U; - static const u32 SecondaryCacheQuarantineSize = 32U; - static const u32 SecondaryCacheDefaultMaxEntriesCount = 4U; - static const uptr SecondaryCacheDefaultMaxEntrySize = 1UL << 18; - static const s32 SecondaryCacheMinReleaseToOsIntervalMs = 0; - static const s32 SecondaryCacheMaxReleaseToOsIntervalMs = 0; + struct Secondary { + struct Cache { + static const u32 EntriesArraySize = 256U; + static const u32 QuarantineSize = 32U; + static const u32 DefaultMaxEntriesCount = 32U; + static const uptr DefaultMaxEntrySize = 2UL << 20; + static const s32 MinReleaseToOsIntervalMs = 0; + static const s32 MaxReleaseToOsIntervalMs = 1000; + }; + template using CacheT = MapAllocatorCache; + }; - template - using TSDRegistryT = TSDRegistrySharedT; // Shared, max 2 TSDs. + template using SecondaryT = MapAllocator; }; #if SCUDO_CAN_USE_PRIMARY64 struct FuchsiaConfig { - using SizeClassMap = FuchsiaSizeClassMap; static const bool MaySupportMemoryTagging = false; - - typedef SizeClassAllocator64 Primary; - static const uptr PrimaryRegionSizeLog = 30U; - typedef u32 PrimaryCompactPtrT; - static const bool PrimaryEnableRandomOffset = true; - static const uptr PrimaryMapSizeIncrement = 1UL << 18; - static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; - static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - - typedef MapAllocatorNoCache SecondaryCache; template using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. + + struct Primary { + using SizeClassMap = FuchsiaSizeClassMap; +#if SCUDO_RISCV64 + // Support 39-bit VMA for riscv-64 + static const uptr RegionSizeLog = 28U; + static const uptr GroupSizeLog = 19U; +#else + static const uptr RegionSizeLog = 30U; + static const uptr GroupSizeLog = 21U; +#endif + typedef u32 CompactPtrT; + static const bool EnableRandomOffset = true; + static const uptr MapSizeIncrement = 1UL << 18; + static const uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; + }; + template using PrimaryT = SizeClassAllocator64; + + struct Secondary { + template using CacheT = MapAllocatorNoCache; + }; + template using SecondaryT = MapAllocator; }; struct TrustyConfig { - using SizeClassMap = TrustySizeClassMap; - static const bool MaySupportMemoryTagging = false; - - typedef SizeClassAllocator64 Primary; - // Some apps have 1 page of heap total so small regions are necessary. - static const uptr PrimaryRegionSizeLog = 10U; - typedef u32 PrimaryCompactPtrT; - static const bool PrimaryEnableRandomOffset = false; - // Trusty is extremely memory-constrained so minimally round up map calls. - static const uptr PrimaryMapSizeIncrement = 1UL << 4; - static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; - static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - - typedef MapAllocatorNoCache SecondaryCache; + static const bool MaySupportMemoryTagging = true; template using TSDRegistryT = TSDRegistrySharedT; // Shared, max 1 TSD. + + struct Primary { + using SizeClassMap = TrustySizeClassMap; + static const uptr RegionSizeLog = 28U; + static const uptr GroupSizeLog = 20U; + typedef u32 CompactPtrT; + static const bool EnableRandomOffset = false; + static const uptr MapSizeIncrement = 1UL << 12; + static const uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const s32 MaxReleaseToOsIntervalMs = INT32_MAX; + }; + template using PrimaryT = SizeClassAllocator64; + + struct Secondary { + template using CacheT = MapAllocatorNoCache; + }; + + template using SecondaryT = MapAllocator; }; #endif +#ifndef SCUDO_USE_CUSTOM_CONFIG + #if SCUDO_ANDROID typedef AndroidConfig Config; #elif SCUDO_FUCHSIA @@ -199,6 +273,8 @@ typedef TrustyConfig Config; typedef DefaultConfig Config; #endif +#endif // SCUDO_USE_CUSTOM_CONFIG + } // namespace scudo #endif // SCUDO_ALLOCATOR_CONFIG_H_ diff --git a/third_party/llvm-scudo-standalone/atomic_helpers.h b/third_party/llvm-scudo-standalone/atomic_helpers.h index d88f5d7..a68ffd1 100644 --- a/third_party/llvm-scudo-standalone/atomic_helpers.h +++ b/third_party/llvm-scudo-standalone/atomic_helpers.h @@ -133,10 +133,10 @@ inline void atomic_store_relaxed(volatile T *A, typename T::Type V) { } template -inline typename T::Type atomic_compare_exchange(volatile T *A, - typename T::Type Cmp, - typename T::Type Xchg) { - atomic_compare_exchange_strong(A, &Cmp, Xchg, memory_order_acquire); +inline typename T::Type +atomic_compare_exchange_strong(volatile T *A, typename T::Type Cmp, + typename T::Type Xchg, memory_order MO) { + atomic_compare_exchange_strong(A, &Cmp, Xchg, MO); return Cmp; } diff --git a/third_party/llvm-scudo-standalone/benchmarks/malloc_benchmark.cpp b/third_party/llvm-scudo-standalone/benchmarks/malloc_benchmark.cpp index 2adec88..4fb05b7 100644 --- a/third_party/llvm-scudo-standalone/benchmarks/malloc_benchmark.cpp +++ b/third_party/llvm-scudo-standalone/benchmarks/malloc_benchmark.cpp @@ -52,8 +52,6 @@ static const size_t MaxSize = 128 * 1024; // cleanly. BENCHMARK_TEMPLATE(BM_malloc_free, scudo::AndroidConfig) ->Range(MinSize, MaxSize); -BENCHMARK_TEMPLATE(BM_malloc_free, scudo::AndroidSvelteConfig) - ->Range(MinSize, MaxSize); #if SCUDO_CAN_USE_PRIMARY64 BENCHMARK_TEMPLATE(BM_malloc_free, scudo::FuchsiaConfig) ->Range(MinSize, MaxSize); @@ -99,8 +97,6 @@ static const size_t MaxIters = 32 * 1024; // cleanly. BENCHMARK_TEMPLATE(BM_malloc_free_loop, scudo::AndroidConfig) ->Range(MinIters, MaxIters); -BENCHMARK_TEMPLATE(BM_malloc_free_loop, scudo::AndroidSvelteConfig) - ->Range(MinIters, MaxIters); #if SCUDO_CAN_USE_PRIMARY64 BENCHMARK_TEMPLATE(BM_malloc_free_loop, scudo::FuchsiaConfig) ->Range(MinIters, MaxIters); diff --git a/third_party/llvm-scudo-standalone/checksum.cpp b/third_party/llvm-scudo-standalone/checksum.cpp index 05d4ba5..2c27739 100644 --- a/third_party/llvm-scudo-standalone/checksum.cpp +++ b/third_party/llvm-scudo-standalone/checksum.cpp @@ -8,6 +8,7 @@ #include "checksum.h" #include "atomic_helpers.h" +#include "chunk.h" #if defined(__x86_64__) || defined(__i386__) #include diff --git a/third_party/llvm-scudo-standalone/checksum.h b/third_party/llvm-scudo-standalone/checksum.h index a63b1b4..f8eda81 100644 --- a/third_party/llvm-scudo-standalone/checksum.h +++ b/third_party/llvm-scudo-standalone/checksum.h @@ -12,12 +12,17 @@ #include "internal_defs.h" // Hardware CRC32 is supported at compilation via the following: -// - for i386 & x86_64: -msse4.2 +// - for i386 & x86_64: -mcrc32 (earlier: -msse4.2) // - for ARM & AArch64: -march=armv8-a+crc or -mcrc // An additional check must be performed at runtime as well to make sure the // emitted instructions are valid on the target host. -#ifdef __SSE4_2__ +#if defined(__CRC32__) +// NB: clang has but GCC does not +#include +#define CRC32_INTRINSIC \ + FIRST_32_SECOND_64(__builtin_ia32_crc32si, __builtin_ia32_crc32di) +#elif defined(__SSE4_2__) #include #define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) #endif diff --git a/third_party/llvm-scudo-standalone/chunk.h b/third_party/llvm-scudo-standalone/chunk.h index 69b8e1b..9228df0 100644 --- a/third_party/llvm-scudo-standalone/chunk.h +++ b/third_party/llvm-scudo-standalone/chunk.h @@ -25,7 +25,7 @@ inline u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { // as opposed to only for crc32_hw.cpp. This means that other hardware // specific instructions were likely emitted at other places, and as a result // there is no reason to not use it here. -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) u32 Crc = static_cast(CRC32_INTRINSIC(Seed, Value)); for (uptr I = 0; I < ArraySize; I++) Crc = static_cast(CRC32_INTRINSIC(Crc, Array[I])); @@ -42,7 +42,8 @@ inline u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { Checksum = computeBSDChecksum(Checksum, Array[I]); return Checksum; } -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || + // defined(__ARM_FEATURE_CRC32) } namespace Chunk { @@ -84,7 +85,7 @@ constexpr uptr OffsetMask = (1UL << 16) - 1; constexpr uptr ChecksumMask = (1UL << 16) - 1; constexpr uptr getHeaderSize() { - return roundUpTo(sizeof(PackedHeader), 1U << SCUDO_MIN_ALIGNMENT_LOG); + return roundUp(sizeof(PackedHeader), 1U << SCUDO_MIN_ALIGNMENT_LOG); } inline AtomicPackedHeader *getAtomicHeader(void *Ptr) { @@ -127,19 +128,6 @@ inline void loadHeader(u32 Cookie, const void *Ptr, reportHeaderCorruption(const_cast(Ptr)); } -inline void compareExchangeHeader(u32 Cookie, void *Ptr, - UnpackedHeader *NewUnpackedHeader, - UnpackedHeader *OldUnpackedHeader) { - NewUnpackedHeader->Checksum = - computeHeaderChecksum(Cookie, Ptr, NewUnpackedHeader); - PackedHeader NewPackedHeader = bit_cast(*NewUnpackedHeader); - PackedHeader OldPackedHeader = bit_cast(*OldUnpackedHeader); - if (UNLIKELY(!atomic_compare_exchange_strong( - getAtomicHeader(Ptr), &OldPackedHeader, NewPackedHeader, - memory_order_relaxed))) - reportHeaderRace(Ptr); -} - inline bool isValid(u32 Cookie, const void *Ptr, UnpackedHeader *NewUnpackedHeader) { PackedHeader NewPackedHeader = atomic_load_relaxed(getConstAtomicHeader(Ptr)); diff --git a/third_party/llvm-scudo-standalone/combined.h b/third_party/llvm-scudo-standalone/combined.h index fd5360c..4624f83 100644 --- a/third_party/llvm-scudo-standalone/combined.h +++ b/third_party/llvm-scudo-standalone/combined.h @@ -14,6 +14,7 @@ #include "flags.h" #include "flags_parser.h" #include "local_cache.h" +#include "mem_map.h" #include "memtag.h" #include "options.h" #include "quarantine.h" @@ -42,13 +43,14 @@ extern "C" size_t android_unsafe_frame_pointer_chase(scudo::uptr *buf, namespace scudo { -template +template class Allocator { public: - using PrimaryT = typename Params::Primary; + using PrimaryT = typename Config::template PrimaryT; + using SecondaryT = typename Config::template SecondaryT; using CacheT = typename PrimaryT::CacheT; - typedef Allocator ThisT; - typedef typename Params::template TSDRegistryT TSDRegistryT; + typedef Allocator ThisT; + typedef typename Config::template TSDRegistryT TSDRegistryT; void callPostInitCallback() { pthread_once(&PostInitNonce, PostInitCallback); @@ -66,14 +68,13 @@ class Allocator { if (UNLIKELY(Header.State != Chunk::State::Quarantined)) reportInvalidChunkState(AllocatorAction::Recycling, Ptr); - Chunk::UnpackedHeader NewHeader = Header; - NewHeader.State = Chunk::State::Available; - Chunk::compareExchangeHeader(Allocator.Cookie, Ptr, &NewHeader, &Header); + Header.State = Chunk::State::Available; + Chunk::storeHeader(Allocator.Cookie, Ptr, &Header); - if (allocatorSupportsMemoryTagging()) + if (allocatorSupportsMemoryTagging()) Ptr = untagPointer(Ptr); - void *BlockBegin = Allocator::getBlockBegin(Ptr, &NewHeader); - Cache.deallocate(NewHeader.ClassId, BlockBegin); + void *BlockBegin = Allocator::getBlockBegin(Ptr, &Header); + Cache.deallocate(Header.ClassId, BlockBegin); } // We take a shortcut when allocating a quarantine batch by working with the @@ -97,7 +98,7 @@ class Allocator { // Reset tag to 0 as this chunk may have been previously used for a tagged // user allocation. - if (UNLIKELY(useMemoryTagging(Allocator.Primary.Options.load()))) + if (UNLIKELY(useMemoryTagging(Allocator.Primary.Options.load()))) storeTags(reinterpret_cast(Ptr), reinterpret_cast(Ptr) + sizeof(QuarantineBatch)); @@ -116,9 +117,8 @@ class Allocator { DCHECK_EQ(Header.Offset, 0); DCHECK_EQ(Header.SizeOrUnusedBytes, sizeof(QuarantineBatch)); - Chunk::UnpackedHeader NewHeader = Header; - NewHeader.State = Chunk::State::Available; - Chunk::compareExchangeHeader(Allocator.Cookie, Ptr, &NewHeader, &Header); + Header.State = Chunk::State::Available; + Chunk::storeHeader(Allocator.Cookie, Ptr, &Header); Cache.deallocate(QuarantineClassId, reinterpret_cast(reinterpret_cast(Ptr) - Chunk::getHeaderSize())); @@ -158,10 +158,9 @@ class Allocator { Primary.Options.set(OptionBit::DeallocTypeMismatch); if (getFlags()->delete_size_mismatch) Primary.Options.set(OptionBit::DeleteSizeMismatch); - if (allocatorSupportsMemoryTagging() && + if (allocatorSupportsMemoryTagging() && systemSupportsMemoryTagging()) Primary.Options.set(OptionBit::UseMemoryTagging); - Primary.Options.set(OptionBit::UseOddEvenTags); QuarantineMaxChunkSize = static_cast(getFlags()->quarantine_max_chunk_size); @@ -173,6 +172,8 @@ class Allocator { Quarantine.init( static_cast(getFlags()->quarantine_size_kb << 10), static_cast(getFlags()->thread_local_quarantine_size_kb << 10)); + + mapAndInitializeRingBuffer(); } // Initialize the embedded GWP-ASan instance. Requires the main allocator to @@ -185,6 +186,7 @@ class Allocator { getFlags()->GWP_ASAN_MaxSimultaneousAllocations; Opt.SampleRate = getFlags()->GWP_ASAN_SampleRate; Opt.InstallSignalHandlers = getFlags()->GWP_ASAN_InstallSignalHandlers; + Opt.Recoverable = getFlags()->GWP_ASAN_Recoverable; // Embedded GWP-ASan is locked through the Scudo atfork handler (via // Allocator::disable calling GWPASan.disable). Disable GWP-ASan's atfork // handler. @@ -196,7 +198,8 @@ class Allocator { gwp_asan::segv_handler::installSignalHandlers( &GuardedAlloc, Printf, gwp_asan::backtrace::getPrintBacktraceFunction(), - gwp_asan::backtrace::getSegvBacktraceFunction()); + gwp_asan::backtrace::getSegvBacktraceFunction(), + Opt.Recoverable); GuardedAllocSlotSize = GuardedAlloc.getAllocatorState()->maximumAllocationSize(); @@ -205,11 +208,22 @@ class Allocator { #endif // GWP_ASAN_HOOKS } +#ifdef GWP_ASAN_HOOKS + const gwp_asan::AllocationMetadata *getGwpAsanAllocationMetadata() { + return GuardedAlloc.getMetadataRegion(); + } + + const gwp_asan::AllocatorState *getGwpAsanAllocatorState() { + return GuardedAlloc.getAllocatorState(); + } +#endif // GWP_ASAN_HOOKS + ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) { TSDRegistry.initThreadMaybe(this, MinimalInit); } void unmapTestOnly() { + unmapRingBuffer(); TSDRegistry.unmapTestOnly(this); Primary.unmapTestOnly(); Secondary.unmapTestOnly(); @@ -221,6 +235,7 @@ class Allocator { } TSDRegistryT *getTSDRegistry() { return &TSDRegistry; } + QuarantineT *getQuarantine() { return &Quarantine; } // The Cache must be provided zero-initialized. void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); } @@ -231,13 +246,22 @@ class Allocator { // - unlinking the local stats from the global ones (destroying the cache does // the last two items). void commitBack(TSD *TSD) { - Quarantine.drain(&TSD->QuarantineCache, - QuarantineCallback(*this, TSD->Cache)); - TSD->Cache.destroy(&Stats); + TSD->assertLocked(/*BypassCheck=*/true); + Quarantine.drain(&TSD->getQuarantineCache(), + QuarantineCallback(*this, TSD->getCache())); + TSD->getCache().destroy(&Stats); + } + + void drainCache(TSD *TSD) { + TSD->assertLocked(/*BypassCheck=*/true); + Quarantine.drainAndRecycle(&TSD->getQuarantineCache(), + QuarantineCallback(*this, TSD->getCache())); + TSD->getCache().drain(); } + void drainCaches() { TSDRegistry.drainCaches(this); } ALWAYS_INLINE void *getHeaderTaggedPointer(void *Ptr) { - if (!allocatorSupportsMemoryTagging()) + if (!allocatorSupportsMemoryTagging()) return Ptr; auto UntaggedPtr = untagPointer(Ptr); if (UntaggedPtr != Ptr) @@ -249,7 +273,7 @@ class Allocator { } ALWAYS_INLINE uptr addHeaderTag(uptr Ptr) { - if (!allocatorSupportsMemoryTagging()) + if (!allocatorSupportsMemoryTagging()) return Ptr; return addFixedTag(Ptr, 2); } @@ -272,7 +296,7 @@ class Allocator { #endif } - uptr computeOddEvenMaskForPointerMaybe(Options Options, uptr Ptr, + uptr computeOddEvenMaskForPointerMaybe(const Options &Options, uptr Ptr, uptr ClassId) { if (!Options.get(OptionBit::UseOddEvenTags)) return 0; @@ -287,7 +311,7 @@ class Allocator { NOINLINE void *allocate(uptr Size, Chunk::Origin Origin, uptr Alignment = MinAlignment, - bool ZeroContents = false) { + bool ZeroContents = false) NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); const Options Options = Primary.Options.load(); @@ -302,8 +326,6 @@ class Allocator { #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.shouldSample())) { if (void *Ptr = GuardedAlloc.allocate(Size, Alignment)) { - if (UNLIKELY(&__scudo_allocate_hook)) - __scudo_allocate_hook(Ptr, Size); Stats.lock(); Stats.add(StatAllocated, GuardedAllocSlotSize); Stats.sub(StatFree, GuardedAllocSlotSize); @@ -324,7 +346,7 @@ class Allocator { // to be sure that there will be an address in the block that will satisfy // the alignment. const uptr NeededSize = - roundUpTo(Size, MinAlignment) + + roundUp(Size, MinAlignment) + ((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize()); // Takes care of extravagantly large sizes as well as integer overflows. @@ -344,33 +366,35 @@ class Allocator { DCHECK_NE(ClassId, 0U); bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - Block = TSD->Cache.allocate(ClassId); - // If the allocation failed, the most likely reason with a 32-bit primary - // is the region being full. In that event, retry in each successively - // larger class until it fits. If it fails to fit in the largest class, - // fallback to the Secondary. + TSD->assertLocked(/*BypassCheck=*/!UnlockRequired); + Block = TSD->getCache().allocate(ClassId); + // If the allocation failed, retry in each successively larger class until + // it fits. If it fails to fit in the largest class, fallback to the + // Secondary. if (UNLIKELY(!Block)) { while (ClassId < SizeClassMap::LargestClassId && !Block) - Block = TSD->Cache.allocate(++ClassId); + Block = TSD->getCache().allocate(++ClassId); if (!Block) ClassId = 0; } if (UnlockRequired) TSD->unlock(); } - if (UNLIKELY(ClassId == 0)) + if (UNLIKELY(ClassId == 0)) { Block = Secondary.allocate(Options, Size, Alignment, &SecondaryBlockEnd, FillContents); + } if (UNLIKELY(!Block)) { if (Options.get(OptionBit::MayReturnNull)) return nullptr; + printStats(); reportOutOfMemory(NeededSize); } const uptr BlockUptr = reinterpret_cast(Block); const uptr UnalignedUserPtr = BlockUptr + Chunk::getHeaderSize(); - const uptr UserPtr = roundUpTo(UnalignedUserPtr, Alignment); + const uptr UserPtr = roundUp(UnalignedUserPtr, Alignment); void *Ptr = reinterpret_cast(UserPtr); void *TaggedPtr = Ptr; @@ -386,7 +410,7 @@ class Allocator { // // When memory tagging is enabled, zeroing the contents is done as part of // setting the tag. - if (UNLIKELY(useMemoryTagging(Options))) { + if (UNLIKELY(useMemoryTagging(Options))) { uptr PrevUserPtr; Chunk::UnpackedHeader Header; const uptr BlockSize = PrimaryT::getSizeByClassId(ClassId); @@ -429,7 +453,7 @@ class Allocator { PrevUserPtr == UserPtr && (TaggedUserPtr = loadTag(UserPtr)) != UserPtr) { uptr PrevEnd = TaggedUserPtr + Header.SizeOrUnusedBytes; - const uptr NextPage = roundUpTo(TaggedUserPtr, getPageSizeCached()); + const uptr NextPage = roundUp(TaggedUserPtr, getPageSizeCached()); if (NextPage < PrevEnd && loadTag(NextPage) != NextPage) PrevEnd = NextPage; TaggedPtr = reinterpret_cast(TaggedUserPtr); @@ -442,8 +466,8 @@ class Allocator { // was freed, it would not have been retagged and thus zeroed, and // therefore it needs to be zeroed now. memset(TaggedPtr, 0, - Min(Size, roundUpTo(PrevEnd - TaggedUserPtr, - archMemoryTagGranuleSize()))); + Min(Size, roundUp(PrevEnd - TaggedUserPtr, + archMemoryTagGranuleSize()))); } else if (Size) { // Clear any stack metadata that may have previously been stored in // the chunk data. @@ -468,7 +492,7 @@ class Allocator { } else { Block = addHeaderTag(Block); Ptr = addHeaderTag(Ptr); - if (UNLIKELY(useMemoryTagging(Options))) { + if (UNLIKELY(useMemoryTagging(Options))) { storeTags(reinterpret_cast(Block), reinterpret_cast(Ptr)); storeSecondaryAllocationStackMaybe(Options, Ptr, Size); } @@ -494,14 +518,14 @@ class Allocator { Chunk::SizeOrUnusedBytesMask; Chunk::storeHeader(Cookie, Ptr, &Header); - if (UNLIKELY(&__scudo_allocate_hook)) - __scudo_allocate_hook(TaggedPtr, Size); - return TaggedPtr; } NOINLINE void deallocate(void *Ptr, Chunk::Origin Origin, uptr DeleteSize = 0, UNUSED uptr Alignment = MinAlignment) { + if (UNLIKELY(!Ptr)) + return; + // For a deallocation, we only ensure minimal initialization, meaning thread // local data will be left uninitialized for now (when using ELF TLS). The // fallback cache will be used instead. This is a workaround for a situation @@ -510,12 +534,6 @@ class Allocator { // being destroyed properly. Any other heap operation will do a full init. initThreadMaybe(/*MinimalInit=*/true); - if (UNLIKELY(&__scudo_deallocate_hook)) - __scudo_deallocate_hook(Ptr); - - if (UNLIKELY(!Ptr)) - return; - #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.pointerIsMine(Ptr))) { GuardedAlloc.deallocate(Ptr); @@ -594,48 +612,47 @@ class Allocator { if (UNLIKELY(!isAligned(reinterpret_cast(OldPtr), MinAlignment))) reportMisalignedPointer(AllocatorAction::Reallocating, OldPtr); - Chunk::UnpackedHeader OldHeader; - Chunk::loadHeader(Cookie, OldPtr, &OldHeader); + Chunk::UnpackedHeader Header; + Chunk::loadHeader(Cookie, OldPtr, &Header); - if (UNLIKELY(OldHeader.State != Chunk::State::Allocated)) + if (UNLIKELY(Header.State != Chunk::State::Allocated)) reportInvalidChunkState(AllocatorAction::Reallocating, OldPtr); // Pointer has to be allocated with a malloc-type function. Some // applications think that it is OK to realloc a memalign'ed pointer, which // will trigger this check. It really isn't. if (Options.get(OptionBit::DeallocTypeMismatch)) { - if (UNLIKELY(OldHeader.OriginOrWasZeroed != Chunk::Origin::Malloc)) + if (UNLIKELY(Header.OriginOrWasZeroed != Chunk::Origin::Malloc)) reportDeallocTypeMismatch(AllocatorAction::Reallocating, OldPtr, - OldHeader.OriginOrWasZeroed, + Header.OriginOrWasZeroed, Chunk::Origin::Malloc); } - void *BlockBegin = getBlockBegin(OldTaggedPtr, &OldHeader); + void *BlockBegin = getBlockBegin(OldTaggedPtr, &Header); uptr BlockEnd; uptr OldSize; - const uptr ClassId = OldHeader.ClassId; + const uptr ClassId = Header.ClassId; if (LIKELY(ClassId)) { BlockEnd = reinterpret_cast(BlockBegin) + SizeClassMap::getSizeByClassId(ClassId); - OldSize = OldHeader.SizeOrUnusedBytes; + OldSize = Header.SizeOrUnusedBytes; } else { BlockEnd = SecondaryT::getBlockEnd(BlockBegin); OldSize = BlockEnd - (reinterpret_cast(OldTaggedPtr) + - OldHeader.SizeOrUnusedBytes); + Header.SizeOrUnusedBytes); } // If the new chunk still fits in the previously allocated block (with a // reasonable delta), we just keep the old block, and update the chunk // header to reflect the size change. if (reinterpret_cast(OldTaggedPtr) + NewSize <= BlockEnd) { if (NewSize > OldSize || (OldSize - NewSize) < getPageSizeCached()) { - Chunk::UnpackedHeader NewHeader = OldHeader; - NewHeader.SizeOrUnusedBytes = + Header.SizeOrUnusedBytes = (ClassId ? NewSize : BlockEnd - (reinterpret_cast(OldTaggedPtr) + NewSize)) & Chunk::SizeOrUnusedBytesMask; - Chunk::compareExchangeHeader(Cookie, OldPtr, &NewHeader, &OldHeader); - if (UNLIKELY(useMemoryTagging(Options))) { + Chunk::storeHeader(Cookie, OldPtr, &Header); + if (UNLIKELY(useMemoryTagging(Options))) { if (ClassId) { resizeTaggedChunk(reinterpret_cast(OldTaggedPtr) + OldSize, reinterpret_cast(OldTaggedPtr) + NewSize, @@ -656,7 +673,7 @@ class Allocator { void *NewPtr = allocate(NewSize, Chunk::Origin::Malloc, Alignment); if (LIKELY(NewPtr)) { memcpy(NewPtr, OldTaggedPtr, Min(NewSize, OldSize)); - quarantineOrDeallocateChunk(Options, OldTaggedPtr, &OldHeader, OldSize); + quarantineOrDeallocateChunk(Options, OldTaggedPtr, &Header, OldSize); } return NewPtr; } @@ -664,7 +681,7 @@ class Allocator { // TODO(kostyak): disable() is currently best-effort. There are some small // windows of time when an allocation could still succeed after // this function finishes. We will revisit that later. - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); #ifdef GWP_ASAN_HOOKS GuardedAlloc.disable(); @@ -676,7 +693,7 @@ class Allocator { Secondary.disable(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { initThreadMaybe(); Secondary.enable(); Primary.enable(); @@ -695,9 +712,7 @@ class Allocator { // sizing purposes. uptr getStats(char *Buffer, uptr Size) { ScopedString Str; - disable(); const uptr Length = getStats(&Str) + 1; - enable(); if (Length < Size) Size = Length; if (Buffer && Size) { @@ -709,15 +724,22 @@ class Allocator { void printStats() { ScopedString Str; - disable(); getStats(&Str); - enable(); Str.output(); } - void releaseToOS() { + void printFragmentationInfo() { + ScopedString Str; + Primary.getFragmentationInfo(&Str); + // Secondary allocator dumps the fragmentation data in getStats(). + Str.output(); + } + + void releaseToOS(ReleaseToOS ReleaseType) { initThreadMaybe(); - Primary.releaseToOS(); + if (ReleaseType == ReleaseToOS::ForceAll) + drainCaches(); + Primary.releaseToOS(ReleaseType); Secondary.releaseToOS(); } @@ -731,7 +753,7 @@ class Allocator { Base = untagPointer(Base); const uptr From = Base; const uptr To = Base + Size; - bool MayHaveTaggedPrimary = allocatorSupportsMemoryTagging() && + bool MayHaveTaggedPrimary = allocatorSupportsMemoryTagging() && systemSupportsMemoryTagging(); auto Lambda = [this, From, To, MayHaveTaggedPrimary, Callback, Arg](uptr Block) { @@ -753,9 +775,9 @@ class Allocator { } if (Header.State == Chunk::State::Allocated) { uptr TaggedChunk = Chunk; - if (allocatorSupportsMemoryTagging()) + if (allocatorSupportsMemoryTagging()) TaggedChunk = untagPointer(TaggedChunk); - if (useMemoryTagging(Primary.Options.load())) + if (useMemoryTagging(Primary.Options.load())) TaggedChunk = loadTag(Chunk); Callback(TaggedChunk, getSize(reinterpret_cast(Chunk), &Header), Arg); @@ -806,10 +828,15 @@ class Allocator { // for it, which then forces realloc to copy the usable size of a chunk as // opposed to its actual size. uptr getUsableSize(const void *Ptr) { - initThreadMaybe(); if (UNLIKELY(!Ptr)) return 0; + return getAllocSize(Ptr); + } + + uptr getAllocSize(const void *Ptr) { + initThreadMaybe(); + #ifdef GWP_ASAN_HOOKS if (UNLIKELY(GuardedAlloc.pointerIsMine(Ptr))) return GuardedAlloc.getSize(Ptr); @@ -818,9 +845,11 @@ class Allocator { Ptr = getHeaderTaggedPointer(const_cast(Ptr)); Chunk::UnpackedHeader Header; Chunk::loadHeader(Cookie, Ptr, &Header); - // Getting the usable size of a chunk only makes sense if it's allocated. + + // Getting the alloc size of a chunk only makes sense if it's allocated. if (UNLIKELY(Header.State != Chunk::State::Allocated)) reportInvalidChunkState(AllocatorAction::Sizing, const_cast(Ptr)); + return getSize(Ptr, &Header); } @@ -847,7 +876,7 @@ class Allocator { } bool useMemoryTaggingTestOnly() const { - return useMemoryTagging(Primary.Options.load()); + return useMemoryTagging(Primary.Options.load()); } void disableMemoryTagging() { // If we haven't been initialized yet, we need to initialize now in order to @@ -857,7 +886,7 @@ class Allocator { // callback), which may cause mappings to be created with memory tagging // enabled. TSDRegistry.initOnceMaybe(this); - if (allocatorSupportsMemoryTagging()) { + if (allocatorSupportsMemoryTagging()) { Secondary.disableMemoryTagging(); Primary.Options.clear(OptionBit::UseMemoryTagging); } @@ -865,6 +894,10 @@ class Allocator { void setTrackAllocationStacks(bool Track) { initThreadMaybe(); + if (getFlags()->allocation_ring_buffer_size <= 0) { + DCHECK(!Primary.Options.load().get(OptionBit::TrackAllocationStacks)); + return; + } if (Track) Primary.Options.set(OptionBit::TrackAllocationStacks); else @@ -896,11 +929,15 @@ class Allocator { return PrimaryT::getRegionInfoArraySize(); } - const char *getRingBufferAddress() const { - return reinterpret_cast(&RingBuffer); + const char *getRingBufferAddress() { + initThreadMaybe(); + return RawRingBuffer; } - static uptr getRingBufferSize() { return sizeof(RingBuffer); } + uptr getRingBufferSize() { + initThreadMaybe(); + return RingBufferElements ? ringBufferSizeInBytes(RingBufferElements) : 0; + } static const uptr MaxTraceSize = 64; @@ -910,16 +947,17 @@ class Allocator { if (!Depot->find(Hash, &RingPos, &Size)) return; for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I) - Trace[I] = (*Depot)[RingPos + I]; + Trace[I] = static_cast((*Depot)[RingPos + I]); } static void getErrorInfo(struct scudo_error_info *ErrorInfo, uintptr_t FaultAddr, const char *DepotPtr, const char *RegionInfoPtr, const char *RingBufferPtr, - const char *Memory, const char *MemoryTags, - uintptr_t MemoryAddr, size_t MemorySize) { + size_t RingBufferSize, const char *Memory, + const char *MemoryTags, uintptr_t MemoryAddr, + size_t MemorySize) { *ErrorInfo = {}; - if (!allocatorSupportsMemoryTagging() || + if (!allocatorSupportsMemoryTagging() || MemoryAddr + MemorySize < MemoryAddr) return; @@ -936,7 +974,7 @@ class Allocator { // Check the ring buffer. For primary allocations this will only find UAF; // for secondary allocations we can find either UAF or OOB. getRingBufferErrorInfo(ErrorInfo, NextErrorReport, FaultAddr, Depot, - RingBufferPtr); + RingBufferPtr, RingBufferSize); // Check for OOB in the 28 blocks surrounding the 3 we checked earlier. // Beyond that we are likely to hit false positives. @@ -947,7 +985,6 @@ class Allocator { } private: - using SecondaryT = MapAllocator; typedef typename PrimaryT::SizeClassMap SizeClassMap; static const uptr MinAlignmentLog = SCUDO_MIN_ALIGNMENT_LOG; @@ -959,7 +996,7 @@ class Allocator { static_assert(MinAlignment >= sizeof(Chunk::PackedHeader), "Minimal alignment must at least cover a chunk header."); - static_assert(!allocatorSupportsMemoryTagging() || + static_assert(!allocatorSupportsMemoryTagging() || MinAlignment >= archMemoryTagGranuleSize(), ""); @@ -1003,14 +1040,14 @@ class Allocator { }; atomic_uptr Pos; -#ifdef SCUDO_FUZZ - static const uptr NumEntries = 2; -#else - static const uptr NumEntries = 32768; -#endif - Entry Entries[NumEntries]; + // An array of Size (at least one) elements of type Entry is immediately + // following to this struct. }; - AllocationRingBuffer RingBuffer = {}; + // Pointer to memory mapped area starting with AllocationRingBuffer struct, + // and immediately followed by Size elements of type Entry. + char *RawRingBuffer = {}; + u32 RingBufferElements = 0; + MemMapT RawRingBufferMap; // The following might get optimized out by the compiler. NOINLINE void performSanityChecks() { @@ -1059,40 +1096,40 @@ class Allocator { const uptr SizeOrUnusedBytes = Header->SizeOrUnusedBytes; if (LIKELY(Header->ClassId)) return SizeOrUnusedBytes; - if (allocatorSupportsMemoryTagging()) + if (allocatorSupportsMemoryTagging()) Ptr = untagPointer(const_cast(Ptr)); return SecondaryT::getBlockEnd(getBlockBegin(Ptr, Header)) - reinterpret_cast(Ptr) - SizeOrUnusedBytes; } - void quarantineOrDeallocateChunk(Options Options, void *TaggedPtr, - Chunk::UnpackedHeader *Header, uptr Size) { + void quarantineOrDeallocateChunk(const Options &Options, void *TaggedPtr, + Chunk::UnpackedHeader *Header, + uptr Size) NO_THREAD_SAFETY_ANALYSIS { void *Ptr = getHeaderTaggedPointer(TaggedPtr); - Chunk::UnpackedHeader NewHeader = *Header; // If the quarantine is disabled, the actual size of a chunk is 0 or larger // than the maximum allowed, we return a chunk directly to the backend. // This purposefully underflows for Size == 0. const bool BypassQuarantine = !Quarantine.getCacheSize() || ((Size - 1) >= QuarantineMaxChunkSize) || - !NewHeader.ClassId; + !Header->ClassId; if (BypassQuarantine) - NewHeader.State = Chunk::State::Available; + Header->State = Chunk::State::Available; else - NewHeader.State = Chunk::State::Quarantined; - NewHeader.OriginOrWasZeroed = useMemoryTagging(Options) && - NewHeader.ClassId && - !TSDRegistry.getDisableMemInit(); - Chunk::compareExchangeHeader(Cookie, Ptr, &NewHeader, Header); + Header->State = Chunk::State::Quarantined; + Header->OriginOrWasZeroed = useMemoryTagging(Options) && + Header->ClassId && + !TSDRegistry.getDisableMemInit(); + Chunk::storeHeader(Cookie, Ptr, Header); - if (UNLIKELY(useMemoryTagging(Options))) { + if (UNLIKELY(useMemoryTagging(Options))) { u8 PrevTag = extractTag(reinterpret_cast(TaggedPtr)); storeDeallocationStackMaybe(Options, Ptr, PrevTag, Size); - if (NewHeader.ClassId) { + if (Header->ClassId) { if (!TSDRegistry.getDisableMemInit()) { uptr TaggedBegin, TaggedEnd; const uptr OddEvenMask = computeOddEvenMaskForPointerMaybe( - Options, reinterpret_cast(getBlockBegin(Ptr, &NewHeader)), - NewHeader.ClassId); + Options, reinterpret_cast(getBlockBegin(Ptr, Header)), + Header->ClassId); // Exclude the previous tag so that immediate use after free is // detected 100% of the time. setRandomTag(Ptr, Size, OddEvenMask | (1UL << PrevTag), &TaggedBegin, @@ -1101,18 +1138,26 @@ class Allocator { } } if (BypassQuarantine) { - if (allocatorSupportsMemoryTagging()) + if (allocatorSupportsMemoryTagging()) Ptr = untagPointer(Ptr); - void *BlockBegin = getBlockBegin(Ptr, &NewHeader); - const uptr ClassId = NewHeader.ClassId; + void *BlockBegin = getBlockBegin(Ptr, Header); + const uptr ClassId = Header->ClassId; if (LIKELY(ClassId)) { bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - TSD->Cache.deallocate(ClassId, BlockBegin); + TSD->assertLocked(/*BypassCheck=*/!UnlockRequired); + const bool CacheDrained = + TSD->getCache().deallocate(ClassId, BlockBegin); if (UnlockRequired) TSD->unlock(); + // When we have drained some blocks back to the Primary from TSD, that + // implies that we may have the chance to release some pages as well. + // Note that in order not to block other thread's accessing the TSD, + // release the TSD first then try the page release. + if (CacheDrained) + Primary.tryReleaseToOS(ClassId, ReleaseToOS::Normal); } else { - if (UNLIKELY(useMemoryTagging(Options))) + if (UNLIKELY(useMemoryTagging(Options))) storeTags(reinterpret_cast(BlockBegin), reinterpret_cast(Ptr)); Secondary.deallocate(Options, BlockBegin); @@ -1120,8 +1165,9 @@ class Allocator { } else { bool UnlockRequired; auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired); - Quarantine.put(&TSD->QuarantineCache, - QuarantineCallback(*this, TSD->Cache), Ptr, Size); + TSD->assertLocked(/*BypassCheck=*/!UnlockRequired); + Quarantine.put(&TSD->getQuarantineCache(), + QuarantineCallback(*this, TSD->getCache()), Ptr, Size); if (UnlockRequired) TSD->unlock(); } @@ -1181,22 +1227,22 @@ class Allocator { void resizeTaggedChunk(uptr OldPtr, uptr NewPtr, uptr NewSize, uptr BlockEnd) { - uptr RoundOldPtr = roundUpTo(OldPtr, archMemoryTagGranuleSize()); + uptr RoundOldPtr = roundUp(OldPtr, archMemoryTagGranuleSize()); uptr RoundNewPtr; if (RoundOldPtr >= NewPtr) { // If the allocation is shrinking we just need to set the tag past the end // of the allocation to 0. See explanation in storeEndMarker() above. - RoundNewPtr = roundUpTo(NewPtr, archMemoryTagGranuleSize()); + RoundNewPtr = roundUp(NewPtr, archMemoryTagGranuleSize()); } else { // Set the memory tag of the region - // [RoundOldPtr, roundUpTo(NewPtr, archMemoryTagGranuleSize())) + // [RoundOldPtr, roundUp(NewPtr, archMemoryTagGranuleSize())) // to the pointer tag stored in OldPtr. RoundNewPtr = storeTags(RoundOldPtr, NewPtr); } storeEndMarker(RoundNewPtr, NewSize, BlockEnd); } - void storePrimaryAllocationStackMaybe(Options Options, void *Ptr) { + void storePrimaryAllocationStackMaybe(const Options &Options, void *Ptr) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; auto *Ptr32 = reinterpret_cast(Ptr); @@ -1207,9 +1253,9 @@ class Allocator { void storeRingBufferEntry(void *Ptr, u32 AllocationTrace, u32 AllocationTid, uptr AllocationSize, u32 DeallocationTrace, u32 DeallocationTid) { - uptr Pos = atomic_fetch_add(&RingBuffer.Pos, 1, memory_order_relaxed); + uptr Pos = atomic_fetch_add(&getRingBuffer()->Pos, 1, memory_order_relaxed); typename AllocationRingBuffer::Entry *Entry = - &RingBuffer.Entries[Pos % AllocationRingBuffer::NumEntries]; + getRingBufferEntry(RawRingBuffer, Pos % RingBufferElements); // First invalidate our entry so that we don't attempt to interpret a // partially written state in getSecondaryErrorInfo(). The fences below @@ -1228,7 +1274,7 @@ class Allocator { atomic_store_relaxed(&Entry->Ptr, reinterpret_cast(Ptr)); } - void storeSecondaryAllocationStackMaybe(Options Options, void *Ptr, + void storeSecondaryAllocationStackMaybe(const Options &Options, void *Ptr, uptr Size) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; @@ -1243,8 +1289,8 @@ class Allocator { storeRingBufferEntry(untagPointer(Ptr), Trace, Tid, Size, 0, 0); } - void storeDeallocationStackMaybe(Options Options, void *Ptr, u8 PrevTag, - uptr Size) { + void storeDeallocationStackMaybe(const Options &Options, void *Ptr, + u8 PrevTag, uptr Size) { if (!UNLIKELY(Options.get(OptionBit::TrackAllocationStacks))) return; @@ -1261,8 +1307,8 @@ class Allocator { } static const size_t NumErrorReports = - sizeof(((scudo_error_info *)0)->reports) / - sizeof(((scudo_error_info *)0)->reports[0]); + sizeof(((scudo_error_info *)nullptr)->reports) / + sizeof(((scudo_error_info *)nullptr)->reports[0]); static void getInlineErrorInfo(struct scudo_error_info *ErrorInfo, size_t &NextErrorReport, uintptr_t FaultAddr, @@ -1350,15 +1396,19 @@ class Allocator { size_t &NextErrorReport, uintptr_t FaultAddr, const StackDepot *Depot, - const char *RingBufferPtr) { + const char *RingBufferPtr, + size_t RingBufferSize) { auto *RingBuffer = reinterpret_cast(RingBufferPtr); + size_t RingBufferElements = ringBufferElementsFromBytes(RingBufferSize); + if (!RingBuffer || RingBufferElements == 0) + return; uptr Pos = atomic_load_relaxed(&RingBuffer->Pos); - for (uptr I = Pos - 1; I != Pos - 1 - AllocationRingBuffer::NumEntries && + for (uptr I = Pos - 1; I != Pos - 1 - RingBufferElements && NextErrorReport != NumErrorReports; --I) { - auto *Entry = &RingBuffer->Entries[I % AllocationRingBuffer::NumEntries]; + auto *Entry = getRingBufferEntry(RingBufferPtr, I % RingBufferElements); uptr EntryPtr = atomic_load_relaxed(&Entry->Ptr); if (!EntryPtr) continue; @@ -1421,8 +1471,66 @@ class Allocator { Primary.getStats(Str); Secondary.getStats(Str); Quarantine.getStats(Str); + TSDRegistry.getStats(Str); return Str->length(); } + + static typename AllocationRingBuffer::Entry * + getRingBufferEntry(char *RawRingBuffer, uptr N) { + return &reinterpret_cast( + &RawRingBuffer[sizeof(AllocationRingBuffer)])[N]; + } + static const typename AllocationRingBuffer::Entry * + getRingBufferEntry(const char *RawRingBuffer, uptr N) { + return &reinterpret_cast( + &RawRingBuffer[sizeof(AllocationRingBuffer)])[N]; + } + + void mapAndInitializeRingBuffer() { + if (getFlags()->allocation_ring_buffer_size <= 0) + return; + u32 AllocationRingBufferSize = + static_cast(getFlags()->allocation_ring_buffer_size); + MemMapT MemMap; + MemMap.map( + /*Addr=*/0U, + roundUp(ringBufferSizeInBytes(AllocationRingBufferSize), + getPageSizeCached()), + "scudo:ring_buffer"); + RawRingBuffer = reinterpret_cast(MemMap.getBase()); + RawRingBufferMap = MemMap; + RingBufferElements = AllocationRingBufferSize; + static_assert(sizeof(AllocationRingBuffer) % + alignof(typename AllocationRingBuffer::Entry) == + 0, + "invalid alignment"); + } + + void unmapRingBuffer() { + auto *RingBuffer = getRingBuffer(); + if (RingBuffer != nullptr) { + RawRingBufferMap.unmap(RawRingBufferMap.getBase(), + RawRingBufferMap.getCapacity()); + } + RawRingBuffer = nullptr; + } + + static constexpr size_t ringBufferSizeInBytes(u32 RingBufferElements) { + return sizeof(AllocationRingBuffer) + + RingBufferElements * sizeof(typename AllocationRingBuffer::Entry); + } + + static constexpr size_t ringBufferElementsFromBytes(size_t Bytes) { + if (Bytes < sizeof(AllocationRingBuffer)) { + return 0; + } + return (Bytes - sizeof(AllocationRingBuffer)) / + sizeof(typename AllocationRingBuffer::Entry); + } + + inline AllocationRingBuffer *getRingBuffer() { + return reinterpret_cast(RawRingBuffer); + } }; } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/common.cpp b/third_party/llvm-scudo-standalone/common.cpp index 666f954..06e9306 100644 --- a/third_party/llvm-scudo-standalone/common.cpp +++ b/third_party/llvm-scudo-standalone/common.cpp @@ -21,18 +21,4 @@ uptr getPageSizeSlow() { return PageSizeCached; } -// Fatal internal map() or unmap() error (potentially OOM related). -void NORETURN dieOnMapUnmapError(uptr SizeIfOOM) { - char Error[128] = "Scudo ERROR: internal map or unmap failure\n"; - if (SizeIfOOM) { - formatString( - Error, sizeof(Error), - "Scudo ERROR: internal map failure (NO MEMORY) requesting %zuKB\n", - SizeIfOOM >> 10); - } - outputRaw(Error); - setAbortMessage(Error); - die(); -} - } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/common.h b/third_party/llvm-scudo-standalone/common.h index bc3dfec..ae45683 100644 --- a/third_party/llvm-scudo-standalone/common.h +++ b/third_party/llvm-scudo-standalone/common.h @@ -17,6 +17,7 @@ #include #include +#include namespace scudo { @@ -27,17 +28,31 @@ template inline Dest bit_cast(const Source &S) { return D; } -inline constexpr uptr roundUpTo(uptr X, uptr Boundary) { +inline constexpr bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } + +inline constexpr uptr roundUp(uptr X, uptr Boundary) { + DCHECK(isPowerOfTwo(Boundary)); return (X + Boundary - 1) & ~(Boundary - 1); } +inline constexpr uptr roundUpSlow(uptr X, uptr Boundary) { + return ((X + Boundary - 1) / Boundary) * Boundary; +} -inline constexpr uptr roundDownTo(uptr X, uptr Boundary) { +inline constexpr uptr roundDown(uptr X, uptr Boundary) { + DCHECK(isPowerOfTwo(Boundary)); return X & ~(Boundary - 1); } +inline constexpr uptr roundDownSlow(uptr X, uptr Boundary) { + return (X / Boundary) * Boundary; +} inline constexpr bool isAligned(uptr X, uptr Alignment) { + DCHECK(isPowerOfTwo(Alignment)); return (X & (Alignment - 1)) == 0; } +inline constexpr bool isAlignedSlow(uptr X, uptr Alignment) { + return X % Alignment == 0; +} template constexpr T Min(T A, T B) { return A < B ? A : B; } @@ -49,14 +64,12 @@ template void Swap(T &A, T &B) { B = Tmp; } -inline bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } - inline uptr getMostSignificantSetBitIndex(uptr X) { DCHECK_NE(X, 0U); return SCUDO_WORDSIZE - 1U - static_cast(__builtin_clzl(X)); } -inline uptr roundUpToPowerOfTwo(uptr Size) { +inline uptr roundUpPowerOfTwo(uptr Size) { DCHECK(Size); if (isPowerOfTwo(Size)) return Size; @@ -99,19 +112,19 @@ template inline void shuffle(T *A, u32 N, u32 *RandState) { *RandState = State; } -// Hardware specific inlinable functions. +inline void computePercentage(uptr Numerator, uptr Denominator, uptr *Integral, + uptr *Fractional) { + constexpr uptr Digits = 100; + if (Denominator == 0) { + *Integral = 100; + *Fractional = 0; + return; + } -inline void yieldProcessor(u8 Count) { -#if defined(__i386__) || defined(__x86_64__) - __asm__ __volatile__("" ::: "memory"); - for (u8 I = 0; I < Count; I++) - __asm__ __volatile__("pause"); -#elif defined(__aarch64__) || defined(__arm__) - __asm__ __volatile__("" ::: "memory"); - for (u8 I = 0; I < Count; I++) - __asm__ __volatile__("yield"); -#endif - __asm__ __volatile__("" ::: "memory"); + *Integral = Numerator * Digits / Denominator; + *Fractional = + (((Numerator * Digits) % Denominator) * Digits + Denominator / 2) / + Denominator; } // Platform specific functions. @@ -119,9 +132,10 @@ inline void yieldProcessor(u8 Count) { extern uptr PageSizeCached; uptr getPageSizeSlow(); inline uptr getPageSizeCached() { - // Bionic uses a hardcoded value. - if (SCUDO_ANDROID) - return 4096U; +#if SCUDO_ANDROID && defined(PAGE_SIZE) + // Most Android builds have a build-time constant page size. + return PAGE_SIZE; +#endif if (LIKELY(PageSizeCached)) return PageSizeCached; return getPageSizeSlow(); @@ -133,6 +147,9 @@ u32 getNumberOfCPUs(); const char *getEnv(const char *Name); u64 getMonotonicTime(); +// Gets the time faster but with less accuracy. Can call getMonotonicTime +// if no fast version is available. +u64 getMonotonicTimeFast(); u32 getThreadID(); @@ -147,6 +164,7 @@ bool getRandom(void *Buffer, uptr Length, bool Blocking = false); #define MAP_NOACCESS (1U << 1) #define MAP_RESIZABLE (1U << 2) #define MAP_MEMTAG (1U << 3) +#define MAP_PRECOMMIT (1U << 4) // Our platform memory mapping use is restricted to 3 scenarios: // - reserve memory at a random address (MAP_NOACCESS); @@ -172,10 +190,6 @@ void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data = nullptr); -// Internal map & unmap fatal error. This must not call map(). SizeIfOOM shall -// hold the requested size on an out-of-memory error, 0 otherwise. -void NORETURN dieOnMapUnmapError(uptr SizeIfOOM = 0); - // Logging related functions. void setAbortMessage(const char *Message); @@ -197,6 +211,13 @@ enum class Option : u8 { MaxTSDsCount, // Number of usable TSDs for the shared registry. }; +enum class ReleaseToOS : u8 { + Normal, // Follow the normal rules for releasing pages to the OS + Force, // Force release pages to the OS, but avoid cases that take too long. + ForceAll, // Force release every page possible regardless of how long it will + // take. +}; + constexpr unsigned char PatternFillByte = 0xAB; enum FillContentsMode { diff --git a/third_party/llvm-scudo-standalone/condition_variable.h b/third_party/llvm-scudo-standalone/condition_variable.h new file mode 100644 index 0000000..4afebdc --- /dev/null +++ b/third_party/llvm-scudo-standalone/condition_variable.h @@ -0,0 +1,60 @@ +//===-- condition_variable.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_CONDITION_VARIABLE_H_ +#define SCUDO_CONDITION_VARIABLE_H_ + +#include "condition_variable_base.h" + +#include "common.h" +#include "platform.h" + +#include "condition_variable_linux.h" + +namespace scudo { + +// A default implementation of default condition variable. It doesn't do a real +// `wait`, instead it spins a short amount of time only. +class ConditionVariableDummy + : public ConditionVariableBase { +public: + void notifyAllImpl(UNUSED HybridMutex &M) REQUIRES(M) {} + + void waitImpl(UNUSED HybridMutex &M) REQUIRES(M) { + M.unlock(); + + constexpr u32 SpinTimes = 64; + volatile u32 V = 0; + for (u32 I = 0; I < SpinTimes; ++I) { + u32 Tmp = V + 1; + V = Tmp; + } + + M.lock(); + } +}; + +template +struct ConditionVariableState { + static constexpr bool enabled() { return false; } + // This is only used for compilation purpose so that we won't end up having + // many conditional compilations. If you want to use `ConditionVariableDummy`, + // define `ConditionVariableT` in your allocator configuration. See + // allocator_config.h for more details. + using ConditionVariableT = ConditionVariableDummy; +}; + +template +struct ConditionVariableState { + static constexpr bool enabled() { return Config::UseConditionVariable; } + using ConditionVariableT = typename Config::ConditionVariableT; +}; + +} // namespace scudo + +#endif // SCUDO_CONDITION_VARIABLE_H_ diff --git a/third_party/llvm-scudo-standalone/condition_variable_base.h b/third_party/llvm-scudo-standalone/condition_variable_base.h new file mode 100644 index 0000000..416c327 --- /dev/null +++ b/third_party/llvm-scudo-standalone/condition_variable_base.h @@ -0,0 +1,56 @@ +//===-- condition_variable_base.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_CONDITION_VARIABLE_BASE_H_ +#define SCUDO_CONDITION_VARIABLE_BASE_H_ + +#include "mutex.h" +#include "thread_annotations.h" + +namespace scudo { + +template class ConditionVariableBase { +public: + constexpr ConditionVariableBase() = default; + + void bindTestOnly(HybridMutex &Mutex) { +#if SCUDO_DEBUG + boundMutex = &Mutex; +#else + (void)Mutex; +#endif + } + + void notifyAll(HybridMutex &M) REQUIRES(M) { +#if SCUDO_DEBUG + CHECK_EQ(&M, boundMutex); +#endif + getDerived()->notifyAllImpl(M); + } + + void wait(HybridMutex &M) REQUIRES(M) { +#if SCUDO_DEBUG + CHECK_EQ(&M, boundMutex); +#endif + getDerived()->waitImpl(M); + } + +protected: + Derived *getDerived() { return static_cast(this); } + +#if SCUDO_DEBUG + // Because thread-safety analysis doesn't support pointer aliasing, we are not + // able to mark the proper annotations without false positive. Instead, we + // pass the lock and do the same-lock check separately. + HybridMutex *boundMutex = nullptr; +#endif +}; + +} // namespace scudo + +#endif // SCUDO_CONDITION_VARIABLE_BASE_H_ diff --git a/third_party/llvm-scudo-standalone/condition_variable_linux.cpp b/third_party/llvm-scudo-standalone/condition_variable_linux.cpp new file mode 100644 index 0000000..e6d9bd1 --- /dev/null +++ b/third_party/llvm-scudo-standalone/condition_variable_linux.cpp @@ -0,0 +1,52 @@ +//===-- condition_variable_linux.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_LINUX + +#include "condition_variable_linux.h" + +#include "atomic_helpers.h" + +#include +#include +#include +#include + +namespace scudo { + +void ConditionVariableLinux::notifyAllImpl(UNUSED HybridMutex &M) { + const u32 V = atomic_load_relaxed(&Counter); + atomic_store_relaxed(&Counter, V + 1); + + // TODO(chiahungduan): Move the waiters from the futex waiting queue + // `Counter` to futex waiting queue `M` so that the awoken threads won't be + // blocked again due to locked `M` by current thread. + if (LastNotifyAll != V) { + syscall(SYS_futex, reinterpret_cast(&Counter), FUTEX_WAKE_PRIVATE, + INT_MAX, nullptr, nullptr, 0); + } + + LastNotifyAll = V + 1; +} + +void ConditionVariableLinux::waitImpl(HybridMutex &M) { + const u32 V = atomic_load_relaxed(&Counter) + 1; + atomic_store_relaxed(&Counter, V); + + // TODO: Use ScopedUnlock when it's supported. + M.unlock(); + syscall(SYS_futex, reinterpret_cast(&Counter), FUTEX_WAIT_PRIVATE, V, + nullptr, nullptr, 0); + M.lock(); +} + +} // namespace scudo + +#endif // SCUDO_LINUX diff --git a/third_party/llvm-scudo-standalone/condition_variable_linux.h b/third_party/llvm-scudo-standalone/condition_variable_linux.h new file mode 100644 index 0000000..cd07328 --- /dev/null +++ b/third_party/llvm-scudo-standalone/condition_variable_linux.h @@ -0,0 +1,38 @@ +//===-- condition_variable_linux.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_CONDITION_VARIABLE_LINUX_H_ +#define SCUDO_CONDITION_VARIABLE_LINUX_H_ + +#include "platform.h" + +#if SCUDO_LINUX + +#include "atomic_helpers.h" +#include "condition_variable_base.h" +#include "thread_annotations.h" + +namespace scudo { + +class ConditionVariableLinux + : public ConditionVariableBase { +public: + void notifyAllImpl(HybridMutex &M) REQUIRES(M); + + void waitImpl(HybridMutex &M) REQUIRES(M); + +private: + u32 LastNotifyAll = 0; + atomic_u32 Counter = {}; +}; + +} // namespace scudo + +#endif // SCUDO_LINUX + +#endif // SCUDO_CONDITION_VARIABLE_LINUX_H_ diff --git a/third_party/llvm-scudo-standalone/crc32_hw.cpp b/third_party/llvm-scudo-standalone/crc32_hw.cpp index 62841ba..73f2ae0 100644 --- a/third_party/llvm-scudo-standalone/crc32_hw.cpp +++ b/third_party/llvm-scudo-standalone/crc32_hw.cpp @@ -10,10 +10,11 @@ namespace scudo { -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) u32 computeHardwareCRC32(u32 Crc, uptr Data) { return static_cast(CRC32_INTRINSIC(Crc, Data)); } -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || + // defined(__ARM_FEATURE_CRC32) } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/flags.cpp b/third_party/llvm-scudo-standalone/flags.cpp index de5153b..f498edf 100644 --- a/third_party/llvm-scudo-standalone/flags.cpp +++ b/third_party/llvm-scudo-standalone/flags.cpp @@ -68,6 +68,9 @@ void initFlags() { Parser.parseString(getCompileDefinitionScudoDefaultOptions()); Parser.parseString(getScudoDefaultOptions()); Parser.parseString(getEnv("SCUDO_OPTIONS")); + if (const char *V = getEnv("SCUDO_ALLOCATION_RING_BUFFER_SIZE")) { + Parser.parseStringPair("allocation_ring_buffer_size", V); + } } } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/flags.inc b/third_party/llvm-scudo-standalone/flags.inc index 690d889..f5a2bab 100644 --- a/third_party/llvm-scudo-standalone/flags.inc +++ b/third_party/llvm-scudo-standalone/flags.inc @@ -45,3 +45,7 @@ SCUDO_FLAG(bool, may_return_null, true, SCUDO_FLAG(int, release_to_os_interval_ms, SCUDO_ANDROID ? INT32_MIN : 5000, "Interval (in milliseconds) at which to attempt release of unused " "memory to the OS. Negative values disable the feature.") + +SCUDO_FLAG(int, allocation_ring_buffer_size, 32768, + "Entries to keep in the allocation ring buffer for scudo. " + "Values less or equal to zero disable the buffer.") diff --git a/third_party/llvm-scudo-standalone/flags_parser.cpp b/third_party/llvm-scudo-standalone/flags_parser.cpp index be39fcd..3d8c6f3 100644 --- a/third_party/llvm-scudo-standalone/flags_parser.cpp +++ b/third_party/llvm-scudo-standalone/flags_parser.cpp @@ -10,6 +10,8 @@ #include "common.h" #include "report.h" +#include +#include #include #include @@ -80,7 +82,7 @@ void FlagParser::parseFlag() { ++Pos; Value = Buffer + ValueStart; } - if (!runHandler(Name, Value)) + if (!runHandler(Name, Value, '=')) reportError("flag parsing failed."); } @@ -122,10 +124,16 @@ inline bool parseBool(const char *Value, bool *b) { return false; } -bool FlagParser::runHandler(const char *Name, const char *Value) { +void FlagParser::parseStringPair(const char *Name, const char *Value) { + if (!runHandler(Name, Value, '\0')) + reportError("flag parsing failed."); +} + +bool FlagParser::runHandler(const char *Name, const char *Value, + const char Sep) { for (u32 I = 0; I < NumberOfFlags; ++I) { const uptr Len = strlen(Flags[I].Name); - if (strncmp(Name, Flags[I].Name, Len) != 0 || Name[Len] != '=') + if (strncmp(Name, Flags[I].Name, Len) != 0 || Name[Len] != Sep) continue; bool Ok = false; switch (Flags[I].Type) { @@ -136,12 +144,18 @@ bool FlagParser::runHandler(const char *Name, const char *Value) { break; case FlagType::FT_int: char *ValueEnd; - *reinterpret_cast(Flags[I].Var) = - static_cast(strtol(Value, &ValueEnd, 10)); - Ok = - *ValueEnd == '"' || *ValueEnd == '\'' || isSeparatorOrNull(*ValueEnd); - if (!Ok) + errno = 0; + long V = strtol(Value, &ValueEnd, 10); + if (errno != 0 || // strtol failed (over or underflow) + V > INT_MAX || V < INT_MIN || // overflows integer + // contains unexpected characters + (*ValueEnd != '"' && *ValueEnd != '\'' && + !isSeparatorOrNull(*ValueEnd))) { reportInvalidFlag("int", Value); + break; + } + *reinterpret_cast(Flags[I].Var) = static_cast(V); + Ok = true; break; } return Ok; diff --git a/third_party/llvm-scudo-standalone/flags_parser.h b/third_party/llvm-scudo-standalone/flags_parser.h index ba832ad..ded496f 100644 --- a/third_party/llvm-scudo-standalone/flags_parser.h +++ b/third_party/llvm-scudo-standalone/flags_parser.h @@ -27,6 +27,7 @@ class FlagParser { void *Var); void parseString(const char *S); void printFlagDescriptions(); + void parseStringPair(const char *Name, const char *Value); private: static const u32 MaxFlags = 20; @@ -45,7 +46,7 @@ class FlagParser { void skipWhitespace(); void parseFlags(); void parseFlag(); - bool runHandler(const char *Name, const char *Value); + bool runHandler(const char *Name, const char *Value, char Sep); }; void reportUnrecognizedFlags(); diff --git a/third_party/llvm-scudo-standalone/fuchsia.cpp b/third_party/llvm-scudo-standalone/fuchsia.cpp index 3b473bc..0788c41 100644 --- a/third_party/llvm-scudo-standalone/fuchsia.cpp +++ b/third_party/llvm-scudo-standalone/fuchsia.cpp @@ -17,7 +17,9 @@ #include // for sync_mutex_t #include // for getenv() #include +#include #include +#include #include namespace scudo { @@ -30,6 +32,16 @@ void NORETURN die() { __builtin_trap(); } // with ZX_HANDLE_INVALID. static_assert(ZX_HANDLE_INVALID == 0, ""); +static void NORETURN dieOnError(zx_status_t Status, const char *FnName, + uptr Size) { + char Error[128]; + formatString(Error, sizeof(Error), + "SCUDO ERROR: %s failed with size %zuKB (%s)", FnName, + Size >> 10, zx_status_get_string(Status)); + outputRaw(Error); + die(); +} + static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { // Only scenario so far. DCHECK(Data); @@ -41,7 +53,7 @@ static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { Size, &Data->Vmar, &Data->VmarBase); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmar_allocate", Size); return nullptr; } return reinterpret_cast(Data->VmarBase); @@ -56,8 +68,9 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, if (Flags & MAP_NOACCESS) return allocateVmar(Size, Data, AllowNoMem); - const zx_handle_t Vmar = Data ? Data->Vmar : _zx_vmar_root_self(); - CHECK_NE(Vmar, ZX_HANDLE_INVALID); + const zx_handle_t Vmar = (Data && Data->Vmar != ZX_HANDLE_INVALID) + ? Data->Vmar + : _zx_vmar_root_self(); zx_status_t Status; zx_handle_t Vmo; @@ -71,7 +84,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_set_size(Vmo, VmoSize + Size); if (Status != ZX_OK) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmo_set_size", VmoSize + Size); return nullptr; } } else { @@ -79,7 +92,7 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, Status = _zx_vmo_create(Size, ZX_VMO_RESIZABLE, &Vmo); if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmo_create", Size); return nullptr; } _zx_object_set_property(Vmo, ZX_PROP_NAME, Name, strlen(Name)); @@ -88,11 +101,24 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, uintptr_t P; zx_vm_option_t MapFlags = ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | ZX_VM_ALLOW_FAULTS; + if (Addr) + DCHECK(Data); const uint64_t Offset = Addr ? reinterpret_cast(Addr) - Data->VmarBase : 0; if (Offset) MapFlags |= ZX_VM_SPECIFIC; Status = _zx_vmar_map(Vmar, MapFlags, Offset, Vmo, VmoSize, Size, &P); + if (UNLIKELY(Status != ZX_OK)) { + if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) + dieOnError(Status, "zx_vmar_map", Size); + return nullptr; + } + + if (Flags & MAP_PRECOMMIT) { + Status = _zx_vmar_op_range(Vmar, ZX_VMAR_OP_COMMIT, P, Size, + /*buffer=*/nullptr, /*buffer_size=*/0); + } + // No need to track the Vmo if we don't intend on resizing it. Close it. if (Flags & MAP_RESIZABLE) { DCHECK(Data); @@ -105,9 +131,10 @@ void *map(void *Addr, uptr Size, const char *Name, uptr Flags, } if (UNLIKELY(Status != ZX_OK)) { if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) - dieOnMapUnmapError(Status == ZX_ERR_NO_MEMORY ? Size : 0); + dieOnError(Status, "zx_vmar_op_range", Size); return nullptr; } + if (Data) Data->VmoSize += Size; @@ -123,11 +150,13 @@ void unmap(void *Addr, uptr Size, uptr Flags, MapPlatformData *Data) { CHECK_EQ(_zx_vmar_destroy(Vmar), ZX_OK); CHECK_EQ(_zx_handle_close(Vmar), ZX_OK); } else { - const zx_handle_t Vmar = Data ? Data->Vmar : _zx_vmar_root_self(); + const zx_handle_t Vmar = (Data && Data->Vmar != ZX_HANDLE_INVALID) + ? Data->Vmar + : _zx_vmar_root_self(); const zx_status_t Status = _zx_vmar_unmap(Vmar, reinterpret_cast(Addr), Size); if (UNLIKELY(Status != ZX_OK)) - dieOnMapUnmapError(); + dieOnError(Status, "zx_vmar_unmap", Size); } if (Data) { if (Data->Vmo != ZX_HANDLE_INVALID) @@ -142,12 +171,15 @@ void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags, (Flags & MAP_NOACCESS) ? 0 : (ZX_VM_PERM_READ | ZX_VM_PERM_WRITE); DCHECK(Data); DCHECK_NE(Data->Vmar, ZX_HANDLE_INVALID); - if (_zx_vmar_protect(Data->Vmar, Prot, Addr, Size) != ZX_OK) - dieOnMapUnmapError(); + const zx_status_t Status = _zx_vmar_protect(Data->Vmar, Prot, Addr, Size); + if (Status != ZX_OK) + dieOnError(Status, "zx_vmar_protect", Size); } void releasePagesToOS(UNUSED uptr BaseAddress, uptr Offset, uptr Size, MapPlatformData *Data) { + // TODO: DCHECK the BaseAddress is consistent with the data in + // MapPlatformData. DCHECK(Data); DCHECK_NE(Data->Vmar, ZX_HANDLE_INVALID); DCHECK_NE(Data->Vmo, ZX_HANDLE_INVALID); @@ -177,7 +209,10 @@ void HybridMutex::unlock() __TA_NO_THREAD_SAFETY_ANALYSIS { sync_mutex_unlock(&M); } +void HybridMutex::assertHeldImpl() __TA_NO_THREAD_SAFETY_ANALYSIS {} + u64 getMonotonicTime() { return _zx_clock_get_monotonic(); } +u64 getMonotonicTimeFast() { return _zx_clock_get_monotonic(); } u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); } diff --git a/third_party/llvm-scudo-standalone/fuchsia.h b/third_party/llvm-scudo-standalone/fuchsia.h index d6993f8..c1dfd76 100644 --- a/third_party/llvm-scudo-standalone/fuchsia.h +++ b/third_party/llvm-scudo-standalone/fuchsia.h @@ -13,7 +13,8 @@ #if SCUDO_FUCHSIA -#include +#include +#include namespace scudo { diff --git a/third_party/llvm-scudo-standalone/fuzz/get_error_info_fuzzer.cpp b/third_party/llvm-scudo-standalone/fuzz/get_error_info_fuzzer.cpp index 078e44b..5b01ebe 100644 --- a/third_party/llvm-scudo-standalone/fuzz/get_error_info_fuzzer.cpp +++ b/third_party/llvm-scudo-standalone/fuzz/get_error_info_fuzzer.cpp @@ -46,15 +46,11 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) { } std::string RingBufferBytes = FDP.ConsumeRemainingBytesAsString(); - std::vector RingBuffer(AllocatorT::getRingBufferSize(), 0); - for (size_t i = 0; i < RingBufferBytes.length() && i < RingBuffer.size(); - ++i) { - RingBuffer[i] = RingBufferBytes[i]; - } scudo_error_info ErrorInfo; AllocatorT::getErrorInfo(&ErrorInfo, FaultAddr, StackDepot.data(), - RegionInfo.data(), RingBuffer.data(), Memory, - MemoryTags, MemoryAddr, MemorySize); + RegionInfo.data(), RingBufferBytes.data(), + RingBufferBytes.size(), Memory, MemoryTags, + MemoryAddr, MemorySize); return 0; } diff --git a/third_party/llvm-scudo-standalone/include/scudo/interface.h b/third_party/llvm-scudo-standalone/include/scudo/interface.h index 9b9a846..a2dedea 100644 --- a/third_party/llvm-scudo-standalone/include/scudo/interface.h +++ b/third_party/llvm-scudo-standalone/include/scudo/interface.h @@ -14,13 +14,25 @@ extern "C" { -__attribute__((weak)) const char *__scudo_default_options(); +__attribute__((weak)) const char *__scudo_default_options(void); // Post-allocation & pre-deallocation hooks. -// They must be thread-safe and not use heap related functions. __attribute__((weak)) void __scudo_allocate_hook(void *ptr, size_t size); __attribute__((weak)) void __scudo_deallocate_hook(void *ptr); +// `realloc` involves both deallocation and allocation but they are not reported +// atomically. In one specific case which may keep taking a snapshot right in +// the middle of `realloc` reporting the deallocation and allocation, it may +// confuse the user by missing memory from `realloc`. To alleviate that case, +// define the two `realloc` hooks to get the knowledge of the bundled hook +// calls. These hooks are optional and should only be used when a hooks user +// wants to track reallocs more closely. +// +// See more details in the comment of `realloc` in wrapper_c.inc. +__attribute__((weak)) void +__scudo_realloc_allocate_hook(void *old_ptr, void *new_ptr, size_t size); +__attribute__((weak)) void __scudo_realloc_deallocate_hook(void *old_ptr); + void __scudo_print_stats(void); typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); @@ -73,7 +85,8 @@ typedef void (*iterate_callback)(uintptr_t base, size_t size, void *arg); // pointer. void __scudo_get_error_info(struct scudo_error_info *error_info, uintptr_t fault_addr, const char *stack_depot, - const char *region_info, const char *ring_buffer, + size_t stack_depot_size, const char *region_info, + const char *ring_buffer, size_t ring_buffer_size, const char *memory, const char *memory_tags, uintptr_t memory_addr, size_t memory_size); @@ -101,14 +114,14 @@ struct scudo_error_info { struct scudo_error_report reports[3]; }; -const char *__scudo_get_stack_depot_addr(); -size_t __scudo_get_stack_depot_size(); +const char *__scudo_get_stack_depot_addr(void); +size_t __scudo_get_stack_depot_size(void); -const char *__scudo_get_region_info_addr(); -size_t __scudo_get_region_info_size(); +const char *__scudo_get_region_info_addr(void); +size_t __scudo_get_region_info_size(void); -const char *__scudo_get_ring_buffer_addr(); -size_t __scudo_get_ring_buffer_size(); +const char *__scudo_get_ring_buffer_addr(void); +size_t __scudo_get_ring_buffer_size(void); #ifndef M_DECAY_TIME #define M_DECAY_TIME -100 @@ -118,6 +131,10 @@ size_t __scudo_get_ring_buffer_size(); #define M_PURGE -101 #endif +#ifndef M_PURGE_ALL +#define M_PURGE_ALL -104 +#endif + // Tune the allocator's choice of memory tags to make it more likely that // a certain class of memory errors will be detected. The value argument should // be one of the M_MEMTAG_TUNING_* constants below. @@ -155,6 +172,11 @@ size_t __scudo_get_ring_buffer_size(); #define M_MEMTAG_TUNING_UAF 1 #endif +// Print internal stats to the log. +#ifndef M_LOG_STATS +#define M_LOG_STATS -205 +#endif + } // extern "C" #endif // SCUDO_INTERFACE_H_ diff --git a/third_party/llvm-scudo-standalone/internal_defs.h b/third_party/llvm-scudo-standalone/internal_defs.h index c9ffad1..27c6b45 100644 --- a/third_party/llvm-scudo-standalone/internal_defs.h +++ b/third_party/llvm-scudo-standalone/internal_defs.h @@ -78,16 +78,16 @@ namespace scudo { -typedef unsigned long uptr; -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long long u64; -typedef signed long sptr; -typedef signed char s8; -typedef signed short s16; -typedef signed int s32; -typedef signed long long s64; +typedef uintptr_t uptr; +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +typedef intptr_t sptr; +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; // The following two functions have platform specific implementations. void outputRaw(const char *Buffer); @@ -133,25 +133,25 @@ void NORETURN reportCheckFailed(const char *File, int Line, #else #define DCHECK(A) \ do { \ - } while (false) + } while (false && (A)) #define DCHECK_EQ(A, B) \ do { \ - } while (false) + } while (false && (A) == (B)) #define DCHECK_NE(A, B) \ do { \ - } while (false) + } while (false && (A) != (B)) #define DCHECK_LT(A, B) \ do { \ - } while (false) + } while (false && (A) < (B)) #define DCHECK_LE(A, B) \ do { \ - } while (false) + } while (false && (A) <= (B)) #define DCHECK_GT(A, B) \ do { \ - } while (false) + } while (false && (A) > (B)) #define DCHECK_GE(A, B) \ do { \ - } while (false) + } while (false && (A) >= (B)) #endif // The superfluous die() call effectively makes this macro NORETURN. diff --git a/third_party/llvm-scudo-standalone/linux.cpp b/third_party/llvm-scudo-standalone/linux.cpp index c77c1bb..2746951 100644 --- a/third_party/llvm-scudo-standalone/linux.cpp +++ b/third_party/llvm-scudo-standalone/linux.cpp @@ -11,14 +11,17 @@ #if SCUDO_LINUX #include "common.h" +#include "internal_defs.h" #include "linux.h" #include "mutex.h" +#include "report_linux.h" #include "string_utils.h" #include #include #include #include +#include #include #include #include @@ -41,6 +44,7 @@ uptr getPageSize() { return static_cast(sysconf(_SC_PAGESIZE)); } void NORETURN die() { abort(); } +// TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, UNUSED MapPlatformData *Data) { int MmapFlags = MAP_PRIVATE | MAP_ANONYMOUS; @@ -63,7 +67,7 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, void *P = mmap(Addr, Size, MmapProt, MmapFlags, -1, 0); if (P == MAP_FAILED) { if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) - dieOnMapUnmapError(errno == ENOMEM ? Size : 0); + reportMapError(errno == ENOMEM ? Size : 0); return nullptr; } #if SCUDO_ANDROID @@ -73,19 +77,22 @@ void *map(void *Addr, uptr Size, UNUSED const char *Name, uptr Flags, return P; } +// TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. void unmap(void *Addr, uptr Size, UNUSED uptr Flags, UNUSED MapPlatformData *Data) { if (munmap(Addr, Size) != 0) - dieOnMapUnmapError(); + reportUnmapError(reinterpret_cast(Addr), Size); } +// TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. void setMemoryPermission(uptr Addr, uptr Size, uptr Flags, UNUSED MapPlatformData *Data) { int Prot = (Flags & MAP_NOACCESS) ? PROT_NONE : (PROT_READ | PROT_WRITE); if (mprotect(reinterpret_cast(Addr), Size, Prot) != 0) - dieOnMapUnmapError(); + reportProtectError(Addr, Size, Prot); } +// TODO: Will be deprecated. Use the interfaces in MemMapLinux instead. void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size, UNUSED MapPlatformData *Data) { void *Addr = reinterpret_cast(BaseAddress + Offset); @@ -102,12 +109,14 @@ enum State : u32 { Unlocked = 0, Locked = 1, Sleeping = 2 }; } bool HybridMutex::tryLock() { - return atomic_compare_exchange(&M, Unlocked, Locked) == Unlocked; + return atomic_compare_exchange_strong(&M, Unlocked, Locked, + memory_order_acquire) == Unlocked; } // The following is based on https://akkadia.org/drepper/futex.pdf. void HybridMutex::lockSlow() { - u32 V = atomic_compare_exchange(&M, Unlocked, Locked); + u32 V = atomic_compare_exchange_strong(&M, Unlocked, Locked, + memory_order_acquire); if (V == Unlocked) return; if (V != Sleeping) @@ -127,6 +136,10 @@ void HybridMutex::unlock() { } } +void HybridMutex::assertHeldImpl() { + CHECK(atomic_load(&M, memory_order_acquire) != Unlocked); +} + u64 getMonotonicTime() { timespec TS; clock_gettime(CLOCK_MONOTONIC, &TS); @@ -134,6 +147,17 @@ u64 getMonotonicTime() { static_cast(TS.tv_nsec); } +u64 getMonotonicTimeFast() { +#if defined(CLOCK_MONOTONIC_COARSE) + timespec TS; + clock_gettime(CLOCK_MONOTONIC_COARSE, &TS); + return static_cast(TS.tv_sec) * (1000ULL * 1000 * 1000) + + static_cast(TS.tv_nsec); +#else + return getMonotonicTime(); +#endif +} + u32 getNumberOfCPUs() { cpu_set_t CPUs; // sched_getaffinity can fail for a variety of legitimate reasons (lack of diff --git a/third_party/llvm-scudo-standalone/list.h b/third_party/llvm-scudo-standalone/list.h index 1ac93c2..0137667 100644 --- a/third_party/llvm-scudo-standalone/list.h +++ b/third_party/llvm-scudo-standalone/list.h @@ -110,6 +110,18 @@ template struct SinglyLinkedList : public IntrusiveList { Size--; } + // Insert X next to Prev + void insert(T *Prev, T *X) { + DCHECK(!empty()); + DCHECK_NE(Prev, nullptr); + DCHECK_NE(X, nullptr); + X->Next = Prev->Next; + Prev->Next = X; + if (Last == Prev) + Last = X; + ++Size; + } + void extract(T *Prev, T *X) { DCHECK(!empty()); DCHECK_NE(Prev, nullptr); diff --git a/third_party/llvm-scudo-standalone/local_cache.h b/third_party/llvm-scudo-standalone/local_cache.h index f46645f..46d6aff 100644 --- a/third_party/llvm-scudo-standalone/local_cache.h +++ b/third_party/llvm-scudo-standalone/local_cache.h @@ -10,8 +10,11 @@ #define SCUDO_LOCAL_CACHE_H_ #include "internal_defs.h" +#include "list.h" +#include "platform.h" #include "report.h" #include "stats.h" +#include "string_utils.h" namespace scudo { @@ -19,42 +22,13 @@ template struct SizeClassAllocatorLocalCache { typedef typename SizeClassAllocator::SizeClassMap SizeClassMap; typedef typename SizeClassAllocator::CompactPtrT CompactPtrT; - struct TransferBatch { - static const u32 MaxNumCached = SizeClassMap::MaxNumCachedHint; - void setFromArray(CompactPtrT *Array, u32 N) { - DCHECK_LE(N, MaxNumCached); - Count = N; - memcpy(Batch, Array, sizeof(Batch[0]) * Count); - } - void clear() { Count = 0; } - void add(CompactPtrT P) { - DCHECK_LT(Count, MaxNumCached); - Batch[Count++] = P; - } - void copyToArray(CompactPtrT *Array) const { - memcpy(Array, Batch, sizeof(Batch[0]) * Count); - } - u32 getCount() const { return Count; } - CompactPtrT get(u32 I) const { - DCHECK_LE(I, Count); - return Batch[I]; - } - static u32 getMaxCached(uptr Size) { - return Min(MaxNumCached, SizeClassMap::getMaxCachedHint(Size)); - } - TransferBatch *Next; - - private: - u32 Count; - CompactPtrT Batch[MaxNumCached]; - }; - void init(GlobalStats *S, SizeClassAllocator *A) { DCHECK(isEmpty()); Stats.init(); if (LIKELY(S)) S->link(&Stats); Allocator = A; + initCache(); } void destroy(GlobalStats *S) { @@ -67,7 +41,9 @@ template struct SizeClassAllocatorLocalCache { DCHECK_LT(ClassId, NumClasses); PerClass *C = &PerClassArray[ClassId]; if (C->Count == 0) { - if (UNLIKELY(!refill(C, ClassId))) + // Refill half of the number of max cached. + DCHECK_GT(C->MaxCount / 2, 0U); + if (UNLIKELY(!refill(C, ClassId, C->MaxCount / 2))) return nullptr; DCHECK_GT(C->Count, 0); } @@ -81,13 +57,13 @@ template struct SizeClassAllocatorLocalCache { return Allocator->decompactPtr(ClassId, CompactP); } - void deallocate(uptr ClassId, void *P) { + bool deallocate(uptr ClassId, void *P) { CHECK_LT(ClassId, NumClasses); PerClass *C = &PerClassArray[ClassId]; - // We still have to initialize the cache in the event that the first heap - // operation in a thread is a deallocation. - initCacheMaybe(C); - if (C->Count == C->MaxCount) + + // If the cache is full, drain half of blocks back to the main allocator. + const bool NeedToDrainCache = C->Count == C->MaxCount; + if (NeedToDrainCache) drain(C, ClassId); // See comment in allocate() about memory accesses. const uptr ClassSize = C->ClassSize; @@ -95,6 +71,8 @@ template struct SizeClassAllocatorLocalCache { Allocator->compactPtr(ClassId, reinterpret_cast(P)); Stats.sub(StatAllocated, ClassSize); Stats.add(StatFree, ClassSize); + + return NeedToDrainCache; } bool isEmpty() const { @@ -105,7 +83,7 @@ template struct SizeClassAllocatorLocalCache { } void drain() { - // Drain BatchClassId last as createBatch can refill it. + // Drain BatchClassId last as it may be needed while draining normal blocks. for (uptr I = 0; I < NumClasses; ++I) { if (I == BatchClassId) continue; @@ -117,40 +95,62 @@ template struct SizeClassAllocatorLocalCache { DCHECK(isEmpty()); } - TransferBatch *createBatch(uptr ClassId, void *B) { - if (ClassId != BatchClassId) - B = allocate(BatchClassId); - return reinterpret_cast(B); + void *getBatchClassBlock() { + void *B = allocate(BatchClassId); + if (UNLIKELY(!B)) + reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId)); + return B; } LocalStats &getStats() { return Stats; } + void getStats(ScopedString *Str) { + bool EmptyCache = true; + for (uptr I = 0; I < NumClasses; ++I) { + if (PerClassArray[I].Count == 0) + continue; + + EmptyCache = false; + // The size of BatchClass is set to 0 intentionally. See the comment in + // initCache() for more details. + const uptr ClassSize = I == BatchClassId + ? SizeClassAllocator::getSizeByClassId(I) + : PerClassArray[I].ClassSize; + // Note that the string utils don't support printing u16 thus we cast it + // to a common use type uptr. + Str->append(" %02zu (%6zu): cached: %4zu max: %4zu\n", I, ClassSize, + static_cast(PerClassArray[I].Count), + static_cast(PerClassArray[I].MaxCount)); + } + + if (EmptyCache) + Str->append(" No block is cached.\n"); + } + + static u16 getMaxCached(uptr Size) { + return Min(SizeClassMap::MaxNumCachedHint, + SizeClassMap::getMaxCachedHint(Size)); + } + private: static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr BatchClassId = SizeClassMap::BatchClassId; - struct PerClass { - u32 Count; - u32 MaxCount; + struct alignas(SCUDO_CACHE_LINE_SIZE) PerClass { + u16 Count; + u16 MaxCount; // Note: ClassSize is zero for the transfer batch. uptr ClassSize; - CompactPtrT Chunks[2 * TransferBatch::MaxNumCached]; + CompactPtrT Chunks[2 * SizeClassMap::MaxNumCachedHint]; }; PerClass PerClassArray[NumClasses] = {}; LocalStats Stats; SizeClassAllocator *Allocator = nullptr; - ALWAYS_INLINE void initCacheMaybe(PerClass *C) { - if (LIKELY(C->MaxCount)) - return; - initCache(); - DCHECK_NE(C->MaxCount, 0U); - } - NOINLINE void initCache() { for (uptr I = 0; I < NumClasses; I++) { PerClass *P = &PerClassArray[I]; const uptr Size = SizeClassAllocator::getSizeByClassId(I); - P->MaxCount = 2 * TransferBatch::getMaxCached(Size); + P->MaxCount = static_cast(2 * getMaxCached(Size)); if (I != BatchClassId) { P->ClassSize = Size; } else { @@ -166,30 +166,21 @@ template struct SizeClassAllocatorLocalCache { deallocate(BatchClassId, B); } - NOINLINE bool refill(PerClass *C, uptr ClassId) { - initCacheMaybe(C); - TransferBatch *B = Allocator->popBatch(this, ClassId); - if (UNLIKELY(!B)) - return false; - DCHECK_GT(B->getCount(), 0); - C->Count = B->getCount(); - B->copyToArray(C->Chunks); - B->clear(); - destroyBatch(ClassId, B); - return true; + NOINLINE bool refill(PerClass *C, uptr ClassId, u16 MaxRefill) { + const u16 NumBlocksRefilled = + Allocator->popBlocks(this, ClassId, C->Chunks, MaxRefill); + DCHECK_LE(NumBlocksRefilled, MaxRefill); + C->Count = static_cast(C->Count + NumBlocksRefilled); + return NumBlocksRefilled != 0; } NOINLINE void drain(PerClass *C, uptr ClassId) { - const u32 Count = Min(C->MaxCount / 2, C->Count); - TransferBatch *B = - createBatch(ClassId, Allocator->decompactPtr(ClassId, C->Chunks[0])); - if (UNLIKELY(!B)) - reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId)); - B->setFromArray(&C->Chunks[0], Count); - C->Count -= Count; - for (uptr I = 0; I < C->Count; I++) + const u16 Count = Min(static_cast(C->MaxCount / 2), C->Count); + Allocator->pushBlocks(this, ClassId, &C->Chunks[0], Count); + // u16 will be promoted to int by arithmetic type conversion. + C->Count = static_cast(C->Count - Count); + for (u16 I = 0; I < C->Count; I++) C->Chunks[I] = C->Chunks[I + Count]; - Allocator->pushBatch(ClassId, B); } }; diff --git a/third_party/llvm-scudo-standalone/mem_map.cpp b/third_party/llvm-scudo-standalone/mem_map.cpp new file mode 100644 index 0000000..115cc34 --- /dev/null +++ b/third_party/llvm-scudo-standalone/mem_map.cpp @@ -0,0 +1,84 @@ +//===-- mem_map.cpp ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mem_map.h" + +#include "common.h" + +namespace scudo { + +bool MemMapDefault::mapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *MappedAddr = + ::scudo::map(reinterpret_cast(Addr), Size, Name, Flags, &Data); + if (MappedAddr == nullptr) + return false; + Base = reinterpret_cast(MappedAddr); + MappedBase = Base; + Capacity = Size; + return true; +} + +void MemMapDefault::unmapImpl(uptr Addr, uptr Size) { + if (Size == Capacity) { + Base = MappedBase = Capacity = 0; + } else { + if (Base == Addr) { + Base = Addr + Size; + MappedBase = MappedBase == 0 ? Base : Max(MappedBase, Base); + } + Capacity -= Size; + } + + ::scudo::unmap(reinterpret_cast(Addr), Size, UNMAP_ALL, &Data); +} + +bool MemMapDefault::remapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *RemappedPtr = + ::scudo::map(reinterpret_cast(Addr), Size, Name, Flags, &Data); + const uptr RemappedAddr = reinterpret_cast(RemappedPtr); + MappedBase = MappedBase == 0 ? RemappedAddr : Min(MappedBase, RemappedAddr); + return RemappedAddr == Addr; +} + +void MemMapDefault::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) { + DCHECK_NE(MappedBase, 0U); + DCHECK_GE(From, MappedBase); + return ::scudo::releasePagesToOS(MappedBase, From - MappedBase, Size, &Data); +} + +void MemMapDefault::setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags) { + return ::scudo::setMemoryPermission(Addr, Size, Flags); +} + +void ReservedMemoryDefault::releaseImpl() { + ::scudo::unmap(reinterpret_cast(Base), Capacity, UNMAP_ALL, &Data); +} + +bool ReservedMemoryDefault::createImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *Reserved = ::scudo::map(reinterpret_cast(Addr), Size, Name, + Flags | MAP_NOACCESS, &Data); + if (Reserved == nullptr) + return false; + + Base = reinterpret_cast(Reserved); + Capacity = Size; + + return true; +} + +ReservedMemoryDefault::MemMapT ReservedMemoryDefault::dispatchImpl(uptr Addr, + uptr Size) { + ReservedMemoryDefault::MemMapT NewMap(Addr, Size); + NewMap.setMapPlatformData(Data); + return NewMap; +} + +} // namespace scudo diff --git a/third_party/llvm-scudo-standalone/mem_map.h b/third_party/llvm-scudo-standalone/mem_map.h new file mode 100644 index 0000000..b92216c --- /dev/null +++ b/third_party/llvm-scudo-standalone/mem_map.h @@ -0,0 +1,92 @@ +//===-- mem_map.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_H_ +#define SCUDO_MEM_MAP_H_ + +#include "mem_map_base.h" + +#include "common.h" +#include "internal_defs.h" + +// TODO: This is only used for `MapPlatformData`. Remove these includes when we +// have all three platform specific `MemMap` and `ReservedMemory` +// implementations. +#include "fuchsia.h" +#include "linux.h" +#include "trusty.h" + +#include "mem_map_fuchsia.h" +#include "mem_map_linux.h" + +namespace scudo { + +// This will be deprecated when every allocator has been supported by each +// platform's `MemMap` implementation. +class MemMapDefault final : public MemMapBase { +public: + constexpr MemMapDefault() = default; + MemMapDefault(uptr Base, uptr Capacity) : Base(Base), Capacity(Capacity) {} + + // Impls for base functions. + bool mapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void unmapImpl(uptr Addr, uptr Size); + bool remapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags); + void releasePagesToOSImpl(uptr From, uptr Size) { + return releaseAndZeroPagesToOSImpl(From, Size); + } + void releaseAndZeroPagesToOSImpl(uptr From, uptr Size); + uptr getBaseImpl() { return Base; } + uptr getCapacityImpl() { return Capacity; } + + void setMapPlatformData(MapPlatformData &NewData) { Data = NewData; } + +private: + uptr Base = 0; + uptr Capacity = 0; + uptr MappedBase = 0; + MapPlatformData Data = {}; +}; + +// This will be deprecated when every allocator has been supported by each +// platform's `MemMap` implementation. +class ReservedMemoryDefault final + : public ReservedMemory { +public: + constexpr ReservedMemoryDefault() = default; + + bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void releaseImpl(); + MemMapT dispatchImpl(uptr Addr, uptr Size); + uptr getBaseImpl() { return Base; } + uptr getCapacityImpl() { return Capacity; } + +private: + uptr Base = 0; + uptr Capacity = 0; + MapPlatformData Data = {}; +}; + +#if SCUDO_LINUX +using ReservedMemoryT = ReservedMemoryLinux; +using MemMapT = ReservedMemoryT::MemMapT; +#elif SCUDO_FUCHSIA +using ReservedMemoryT = ReservedMemoryFuchsia; +using MemMapT = ReservedMemoryT::MemMapT; +#elif SCUDO_TRUSTY +using ReservedMemoryT = ReservedMemoryDefault; +using MemMapT = ReservedMemoryT::MemMapT; +#else +#error \ + "Unsupported platform, please implement the ReservedMemory for your platform!" +#endif + +} // namespace scudo + +#endif // SCUDO_MEM_MAP_H_ diff --git a/third_party/llvm-scudo-standalone/mem_map_base.h b/third_party/llvm-scudo-standalone/mem_map_base.h new file mode 100644 index 0000000..99ab0cb --- /dev/null +++ b/third_party/llvm-scudo-standalone/mem_map_base.h @@ -0,0 +1,129 @@ +//===-- mem_map_base.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_BASE_H_ +#define SCUDO_MEM_MAP_BASE_H_ + +#include "common.h" + +namespace scudo { + +// In Scudo, every memory operation will be fulfilled through a +// platform-specific `MemMap` instance. The essential APIs are listed in the +// `MemMapBase` below. This is implemented in CRTP, so for each implementation, +// it has to implement all of the 'Impl' named functions. +template class MemMapBase { +public: + constexpr MemMapBase() = default; + + // This is used to map a new set of contiguous pages. Note that the `Addr` is + // only a suggestion to the system. + bool map(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) { + DCHECK(!isAllocated()); + return invokeImpl(&Derived::mapImpl, Addr, Size, Name, Flags); + } + + // This is used to unmap partial/full pages from the beginning or the end. + // I.e., the result pages are expected to be still contiguous. + void unmap(uptr Addr, uptr Size) { + DCHECK(isAllocated()); + DCHECK((Addr == getBase()) || (Addr + Size == getBase() + getCapacity())); + invokeImpl(&Derived::unmapImpl, Addr, Size); + } + + // This is used to remap a mapped range (either from map() or dispatched from + // ReservedMemory). For example, we have reserved several pages and then we + // want to remap them with different accessibility. + bool remap(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) { + DCHECK(isAllocated()); + DCHECK((Addr >= getBase()) && (Addr + Size <= getBase() + getCapacity())); + return invokeImpl(&Derived::remapImpl, Addr, Size, Name, Flags); + } + + // This is used to update the pages' access permission. For example, mark + // pages as no read/write permission. + void setMemoryPermission(uptr Addr, uptr Size, uptr Flags) { + DCHECK(isAllocated()); + DCHECK((Addr >= getBase()) && (Addr + Size <= getBase() + getCapacity())); + return invokeImpl(&Derived::setMemoryPermissionImpl, Addr, Size, Flags); + } + + // Suggest releasing a set of contiguous physical pages back to the OS. Note + // that only physical pages are supposed to be released. Any release of + // virtual pages may lead to undefined behavior. + void releasePagesToOS(uptr From, uptr Size) { + DCHECK(isAllocated()); + DCHECK((From >= getBase()) && (From + Size <= getBase() + getCapacity())); + invokeImpl(&Derived::releasePagesToOSImpl, From, Size); + } + // This is similar to the above one except that any subsequent access to the + // released pages will return with zero-filled pages. + void releaseAndZeroPagesToOS(uptr From, uptr Size) { + DCHECK(isAllocated()); + DCHECK((From >= getBase()) && (From + Size <= getBase() + getCapacity())); + invokeImpl(&Derived::releaseAndZeroPagesToOSImpl, From, Size); + } + + uptr getBase() { return invokeImpl(&Derived::getBaseImpl); } + uptr getCapacity() { return invokeImpl(&Derived::getCapacityImpl); } + + bool isAllocated() { return getBase() != 0U; } + +protected: + template + R invokeImpl(R (Derived::*MemFn)(Args...), Args... args) { + return (static_cast(this)->*MemFn)(args...); + } +}; + +// `ReservedMemory` is a special memory handle which can be viewed as a page +// allocator. `ReservedMemory` will reserve a contiguous pages and the later +// page request can be fulfilled at the designated address. This is used when +// we want to ensure the virtual address of the MemMap will be in a known range. +// This is implemented in CRTP, so for each +// implementation, it has to implement all of the 'Impl' named functions. +template class ReservedMemory { +public: + using MemMapT = MemMapTy; + constexpr ReservedMemory() = default; + + // Reserve a chunk of memory at a suggested address. + bool create(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) { + DCHECK(!isCreated()); + return invokeImpl(&Derived::createImpl, Addr, Size, Name, Flags); + } + + // Release the entire reserved memory. + void release() { + DCHECK(isCreated()); + invokeImpl(&Derived::releaseImpl); + } + + // Dispatch a sub-range of reserved memory. Note that any fragmentation of + // the reserved pages is managed by each implementation. + MemMapT dispatch(uptr Addr, uptr Size) { + DCHECK(isCreated()); + DCHECK((Addr >= getBase()) && (Addr + Size <= getBase() + getCapacity())); + return invokeImpl(&Derived::dispatchImpl, Addr, Size); + } + + uptr getBase() { return invokeImpl(&Derived::getBaseImpl); } + uptr getCapacity() { return invokeImpl(&Derived::getCapacityImpl); } + + bool isCreated() { return getBase() != 0U; } + +protected: + template + R invokeImpl(R (Derived::*MemFn)(Args...), Args... args) { + return (static_cast(this)->*MemFn)(args...); + } +}; + +} // namespace scudo + +#endif // SCUDO_MEM_MAP_BASE_H_ diff --git a/third_party/llvm-scudo-standalone/mem_map_fuchsia.cpp b/third_party/llvm-scudo-standalone/mem_map_fuchsia.cpp new file mode 100644 index 0000000..0566ab0 --- /dev/null +++ b/third_party/llvm-scudo-standalone/mem_map_fuchsia.cpp @@ -0,0 +1,252 @@ +//===-- mem_map_fuchsia.cpp -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mem_map_fuchsia.h" + +#include "atomic_helpers.h" +#include "common.h" +#include "string_utils.h" + +#if SCUDO_FUCHSIA + +#include +#include +#include + +namespace scudo { + +static void NORETURN dieOnError(zx_status_t Status, const char *FnName, + uptr Size) { + char Error[128]; + formatString(Error, sizeof(Error), + "SCUDO ERROR: %s failed with size %zuKB (%s)", FnName, + Size >> 10, _zx_status_get_string(Status)); + outputRaw(Error); + die(); +} + +static void setVmoName(zx_handle_t Vmo, const char *Name) { + size_t Len = strlen(Name); + DCHECK_LT(Len, ZX_MAX_NAME_LEN); + zx_status_t Status = _zx_object_set_property(Vmo, ZX_PROP_NAME, Name, Len); + CHECK_EQ(Status, ZX_OK); +} + +// Returns the (cached) base address of the root VMAR. +static uptr getRootVmarBase() { + static atomic_uptr CachedResult = {0}; + + uptr Result = atomic_load(&CachedResult, memory_order_acquire); + if (UNLIKELY(!Result)) { + zx_info_vmar_t VmarInfo; + zx_status_t Status = + _zx_object_get_info(_zx_vmar_root_self(), ZX_INFO_VMAR, &VmarInfo, + sizeof(VmarInfo), nullptr, nullptr); + CHECK_EQ(Status, ZX_OK); + CHECK_NE(VmarInfo.base, 0); + + atomic_store(&CachedResult, VmarInfo.base, memory_order_release); + Result = VmarInfo.base; + } + + return Result; +} + +// Lazily creates and then always returns the same zero-sized VMO. +static zx_handle_t getPlaceholderVmo() { + static atomic_u32 StoredVmo = {ZX_HANDLE_INVALID}; + + zx_handle_t Vmo = atomic_load(&StoredVmo, memory_order_acquire); + if (UNLIKELY(Vmo == ZX_HANDLE_INVALID)) { + // Create a zero-sized placeholder VMO. + zx_status_t Status = _zx_vmo_create(0, 0, &Vmo); + if (UNLIKELY(Status != ZX_OK)) + dieOnError(Status, "zx_vmo_create", 0); + + setVmoName(Vmo, "scudo:reserved"); + + // Atomically store its handle. If some other thread wins the race, use its + // handle and discard ours. + zx_handle_t OldValue = atomic_compare_exchange_strong( + &StoredVmo, ZX_HANDLE_INVALID, Vmo, memory_order_acq_rel); + if (UNLIKELY(OldValue != ZX_HANDLE_INVALID)) { + Status = _zx_handle_close(Vmo); + CHECK_EQ(Status, ZX_OK); + + Vmo = OldValue; + } + } + + return Vmo; +} + +MemMapFuchsia::MemMapFuchsia(uptr Base, uptr Capacity) + : MapAddr(Base), WindowBase(Base), WindowSize(Capacity) { + // Create the VMO. + zx_status_t Status = _zx_vmo_create(Capacity, 0, &Vmo); + if (UNLIKELY(Status != ZX_OK)) + dieOnError(Status, "zx_vmo_create", Capacity); +} + +bool MemMapFuchsia::mapImpl(UNUSED uptr Addr, uptr Size, const char *Name, + uptr Flags) { + const bool AllowNoMem = !!(Flags & MAP_ALLOWNOMEM); + const bool PreCommit = !!(Flags & MAP_PRECOMMIT); + const bool NoAccess = !!(Flags & MAP_NOACCESS); + + // Create the VMO. + zx_status_t Status = _zx_vmo_create(Size, 0, &Vmo); + if (UNLIKELY(Status != ZX_OK)) { + if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) + dieOnError(Status, "zx_vmo_create", Size); + return false; + } + + if (Name != nullptr) + setVmoName(Vmo, Name); + + // Map it. + zx_vm_option_t MapFlags = ZX_VM_ALLOW_FAULTS; + if (!NoAccess) + MapFlags |= ZX_VM_PERM_READ | ZX_VM_PERM_WRITE; + Status = + _zx_vmar_map(_zx_vmar_root_self(), MapFlags, 0, Vmo, 0, Size, &MapAddr); + if (UNLIKELY(Status != ZX_OK)) { + if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) + dieOnError(Status, "zx_vmar_map", Size); + + Status = _zx_handle_close(Vmo); + CHECK_EQ(Status, ZX_OK); + + MapAddr = 0; + Vmo = ZX_HANDLE_INVALID; + return false; + } + + if (PreCommit) { + Status = _zx_vmar_op_range(_zx_vmar_root_self(), ZX_VMAR_OP_COMMIT, MapAddr, + Size, nullptr, 0); + CHECK_EQ(Status, ZX_OK); + } + + WindowBase = MapAddr; + WindowSize = Size; + return true; +} + +void MemMapFuchsia::unmapImpl(uptr Addr, uptr Size) { + zx_status_t Status; + + if (Size == WindowSize) { + // NOTE: Closing first and then unmapping seems slightly faster than doing + // the same operations in the opposite order. + Status = _zx_handle_close(Vmo); + CHECK_EQ(Status, ZX_OK); + Status = _zx_vmar_unmap(_zx_vmar_root_self(), Addr, Size); + CHECK_EQ(Status, ZX_OK); + + MapAddr = WindowBase = WindowSize = 0; + Vmo = ZX_HANDLE_INVALID; + } else { + // Unmap the subrange. + Status = _zx_vmar_unmap(_zx_vmar_root_self(), Addr, Size); + CHECK_EQ(Status, ZX_OK); + + // Decommit the pages that we just unmapped. + Status = _zx_vmo_op_range(Vmo, ZX_VMO_OP_DECOMMIT, Addr - MapAddr, Size, + nullptr, 0); + CHECK_EQ(Status, ZX_OK); + + if (Addr == WindowBase) + WindowBase += Size; + WindowSize -= Size; + } +} + +bool MemMapFuchsia::remapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + const bool AllowNoMem = !!(Flags & MAP_ALLOWNOMEM); + const bool PreCommit = !!(Flags & MAP_PRECOMMIT); + const bool NoAccess = !!(Flags & MAP_NOACCESS); + + // NOTE: This will rename the *whole* VMO, not only the requested portion of + // it. But we cannot do better than this given the MemMap API. In practice, + // the upper layers of Scudo always pass the same Name for a given MemMap. + if (Name != nullptr) + setVmoName(Vmo, Name); + + uptr MappedAddr; + zx_vm_option_t MapFlags = ZX_VM_ALLOW_FAULTS | ZX_VM_SPECIFIC_OVERWRITE; + if (!NoAccess) + MapFlags |= ZX_VM_PERM_READ | ZX_VM_PERM_WRITE; + zx_status_t Status = + _zx_vmar_map(_zx_vmar_root_self(), MapFlags, Addr - getRootVmarBase(), + Vmo, Addr - MapAddr, Size, &MappedAddr); + if (UNLIKELY(Status != ZX_OK)) { + if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) + dieOnError(Status, "zx_vmar_map", Size); + return false; + } + DCHECK_EQ(Addr, MappedAddr); + + if (PreCommit) { + Status = _zx_vmar_op_range(_zx_vmar_root_self(), ZX_VMAR_OP_COMMIT, MapAddr, + Size, nullptr, 0); + CHECK_EQ(Status, ZX_OK); + } + + return true; +} + +void MemMapFuchsia::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) { + zx_status_t Status = _zx_vmo_op_range(Vmo, ZX_VMO_OP_DECOMMIT, From - MapAddr, + Size, nullptr, 0); + CHECK_EQ(Status, ZX_OK); +} + +void MemMapFuchsia::setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags) { + const bool NoAccess = !!(Flags & MAP_NOACCESS); + + zx_vm_option_t MapFlags = 0; + if (!NoAccess) + MapFlags |= ZX_VM_PERM_READ | ZX_VM_PERM_WRITE; + zx_status_t Status = + _zx_vmar_protect(_zx_vmar_root_self(), MapFlags, Addr, Size); + CHECK_EQ(Status, ZX_OK); +} + +bool ReservedMemoryFuchsia::createImpl(UNUSED uptr Addr, uptr Size, + UNUSED const char *Name, uptr Flags) { + const bool AllowNoMem = !!(Flags & MAP_ALLOWNOMEM); + + // Reserve memory by mapping the placeholder VMO without any permission. + zx_status_t Status = _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_ALLOW_FAULTS, 0, + getPlaceholderVmo(), 0, Size, &Base); + if (UNLIKELY(Status != ZX_OK)) { + if (Status != ZX_ERR_NO_MEMORY || !AllowNoMem) + dieOnError(Status, "zx_vmar_map", Size); + return false; + } + + Capacity = Size; + return true; +} + +void ReservedMemoryFuchsia::releaseImpl() { + zx_status_t Status = _zx_vmar_unmap(_zx_vmar_root_self(), Base, Capacity); + CHECK_EQ(Status, ZX_OK); +} + +ReservedMemoryFuchsia::MemMapT ReservedMemoryFuchsia::dispatchImpl(uptr Addr, + uptr Size) { + return ReservedMemoryFuchsia::MemMapT(Addr, Size); +} + +} // namespace scudo + +#endif // SCUDO_FUCHSIA diff --git a/third_party/llvm-scudo-standalone/mem_map_fuchsia.h b/third_party/llvm-scudo-standalone/mem_map_fuchsia.h new file mode 100644 index 0000000..2e66f89 --- /dev/null +++ b/third_party/llvm-scudo-standalone/mem_map_fuchsia.h @@ -0,0 +1,75 @@ +//===-- mem_map_fuchsia.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_FUCHSIA_H_ +#define SCUDO_MEM_MAP_FUCHSIA_H_ + +#include "mem_map_base.h" + +#if SCUDO_FUCHSIA + +#include +#include + +namespace scudo { + +class MemMapFuchsia final : public MemMapBase { +public: + constexpr MemMapFuchsia() = default; + + // Impls for base functions. + bool mapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void unmapImpl(uptr Addr, uptr Size); + bool remapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags); + void releasePagesToOSImpl(uptr From, uptr Size) { + return releaseAndZeroPagesToOSImpl(From, Size); + } + void releaseAndZeroPagesToOSImpl(uptr From, uptr Size); + uptr getBaseImpl() { return WindowBase; } + uptr getCapacityImpl() { return WindowSize; } + +private: + friend class ReservedMemoryFuchsia; + + // Used by ReservedMemoryFuchsia::dispatch. + MemMapFuchsia(uptr Base, uptr Capacity); + + // Virtual memory address corresponding to VMO offset 0. + uptr MapAddr = 0; + + // Virtual memory base address and size of the VMO subrange that is still in + // use. unmapImpl() can shrink this range, either at the beginning or at the + // end. + uptr WindowBase = 0; + uptr WindowSize = 0; + + zx_handle_t Vmo = ZX_HANDLE_INVALID; +}; + +class ReservedMemoryFuchsia final + : public ReservedMemory { +public: + constexpr ReservedMemoryFuchsia() = default; + + bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void releaseImpl(); + MemMapT dispatchImpl(uptr Addr, uptr Size); + uptr getBaseImpl() { return Base; } + uptr getCapacityImpl() { return Capacity; } + +private: + uptr Base = 0; + uptr Capacity = 0; +}; + +} // namespace scudo + +#endif // SCUDO_FUCHSIA + +#endif // SCUDO_MEM_MAP_FUCHSIA_H_ diff --git a/third_party/llvm-scudo-standalone/mem_map_linux.cpp b/third_party/llvm-scudo-standalone/mem_map_linux.cpp new file mode 100644 index 0000000..783c4f0 --- /dev/null +++ b/third_party/llvm-scudo-standalone/mem_map_linux.cpp @@ -0,0 +1,153 @@ +//===-- mem_map_linux.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_LINUX + +#include "mem_map_linux.h" + +#include "common.h" +#include "internal_defs.h" +#include "linux.h" +#include "mutex.h" +#include "report_linux.h" +#include "string_utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if SCUDO_ANDROID +// TODO(chiahungduan): Review if we still need the followings macros. +#include +// Definitions of prctl arguments to set a vma name in Android kernels. +#define ANDROID_PR_SET_VMA 0x53564d41 +#define ANDROID_PR_SET_VMA_ANON_NAME 0 +#endif + +namespace scudo { + +static void *mmapWrapper(uptr Addr, uptr Size, const char *Name, uptr Flags) { + int MmapFlags = MAP_PRIVATE | MAP_ANONYMOUS; + int MmapProt; + if (Flags & MAP_NOACCESS) { + MmapFlags |= MAP_NORESERVE; + MmapProt = PROT_NONE; + } else { + MmapProt = PROT_READ | PROT_WRITE; + } +#if defined(__aarch64__) +#ifndef PROT_MTE +#define PROT_MTE 0x20 +#endif + if (Flags & MAP_MEMTAG) + MmapProt |= PROT_MTE; +#endif + if (Addr) + MmapFlags |= MAP_FIXED; + void *P = + mmap(reinterpret_cast(Addr), Size, MmapProt, MmapFlags, -1, 0); + if (P == MAP_FAILED) { + if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM) + reportMapError(errno == ENOMEM ? Size : 0); + return nullptr; + } +#if SCUDO_ANDROID + if (Name) + prctl(ANDROID_PR_SET_VMA, ANDROID_PR_SET_VMA_ANON_NAME, P, Size, Name); +#else + (void)Name; +#endif + + return P; +} + +bool MemMapLinux::mapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags) { + void *P = mmapWrapper(Addr, Size, Name, Flags); + if (P == nullptr) + return false; + + MapBase = reinterpret_cast(P); + MapCapacity = Size; + return true; +} + +void MemMapLinux::unmapImpl(uptr Addr, uptr Size) { + // If we unmap all the pages, also mark `MapBase` to 0 to indicate invalid + // status. + if (Size == MapCapacity) { + MapBase = MapCapacity = 0; + } else { + // This is partial unmap and is unmapping the pages from the beginning, + // shift `MapBase` to the new base. + if (MapBase == Addr) + MapBase = Addr + Size; + MapCapacity -= Size; + } + + if (munmap(reinterpret_cast(Addr), Size) != 0) + reportUnmapError(Addr, Size); +} + +bool MemMapLinux::remapImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + void *P = mmapWrapper(Addr, Size, Name, Flags); + if (reinterpret_cast(P) != Addr) + reportMapError(); + return true; +} + +void MemMapLinux::setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags) { + int Prot = (Flags & MAP_NOACCESS) ? PROT_NONE : (PROT_READ | PROT_WRITE); + if (mprotect(reinterpret_cast(Addr), Size, Prot) != 0) + reportProtectError(Addr, Size, Prot); +} + +void MemMapLinux::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) { + void *Addr = reinterpret_cast(From); + + while (madvise(Addr, Size, MADV_DONTNEED) == -1 && errno == EAGAIN) { + } +} + +bool ReservedMemoryLinux::createImpl(uptr Addr, uptr Size, const char *Name, + uptr Flags) { + ReservedMemoryLinux::MemMapT MemMap; + if (!MemMap.map(Addr, Size, Name, Flags | MAP_NOACCESS)) + return false; + + MapBase = MemMap.getBase(); + MapCapacity = MemMap.getCapacity(); + + return true; +} + +void ReservedMemoryLinux::releaseImpl() { + if (munmap(reinterpret_cast(getBase()), getCapacity()) != 0) + reportUnmapError(getBase(), getCapacity()); +} + +ReservedMemoryLinux::MemMapT ReservedMemoryLinux::dispatchImpl(uptr Addr, + uptr Size) { + return ReservedMemoryLinux::MemMapT(Addr, Size); +} + +} // namespace scudo + +#endif // SCUDO_LINUX diff --git a/third_party/llvm-scudo-standalone/mem_map_linux.h b/third_party/llvm-scudo-standalone/mem_map_linux.h new file mode 100644 index 0000000..7a89b3b --- /dev/null +++ b/third_party/llvm-scudo-standalone/mem_map_linux.h @@ -0,0 +1,67 @@ +//===-- mem_map_linux.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_MEM_MAP_LINUX_H_ +#define SCUDO_MEM_MAP_LINUX_H_ + +#include "platform.h" + +#if SCUDO_LINUX + +#include "common.h" +#include "mem_map_base.h" + +namespace scudo { + +class MemMapLinux final : public MemMapBase { +public: + constexpr MemMapLinux() = default; + MemMapLinux(uptr Base, uptr Capacity) + : MapBase(Base), MapCapacity(Capacity) {} + + // Impls for base functions. + bool mapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags = 0); + void unmapImpl(uptr Addr, uptr Size); + bool remapImpl(uptr Addr, uptr Size, const char *Name, uptr Flags = 0); + void setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags); + void releasePagesToOSImpl(uptr From, uptr Size) { + return releaseAndZeroPagesToOSImpl(From, Size); + } + void releaseAndZeroPagesToOSImpl(uptr From, uptr Size); + uptr getBaseImpl() { return MapBase; } + uptr getCapacityImpl() { return MapCapacity; } + +private: + uptr MapBase = 0; + uptr MapCapacity = 0; +}; + +// This will be deprecated when every allocator has been supported by each +// platform's `MemMap` implementation. +class ReservedMemoryLinux final + : public ReservedMemory { +public: + // The following two are the Impls for function in `MemMapBase`. + uptr getBaseImpl() { return MapBase; } + uptr getCapacityImpl() { return MapCapacity; } + + // These threes are specific to `ReservedMemory`. + bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags); + void releaseImpl(); + MemMapT dispatchImpl(uptr Addr, uptr Size); + +private: + uptr MapBase = 0; + uptr MapCapacity = 0; +}; + +} // namespace scudo + +#endif // SCUDO_LINUX + +#endif // SCUDO_MEM_MAP_LINUX_H_ diff --git a/third_party/llvm-scudo-standalone/memtag.h b/third_party/llvm-scudo-standalone/memtag.h index c48e228..aaed219 100644 --- a/third_party/llvm-scudo-standalone/memtag.h +++ b/third_party/llvm-scudo-standalone/memtag.h @@ -11,19 +11,22 @@ #include "internal_defs.h" -#if SCUDO_LINUX +#if SCUDO_CAN_USE_MTE #include #include #endif namespace scudo { -#if (__clang_major__ >= 12 && defined(__aarch64__)) || defined(SCUDO_FUZZ) +#if (__clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__)) || \ + defined(SCUDO_FUZZ) // We assume that Top-Byte Ignore is enabled if the architecture supports memory // tagging. Not all operating systems enable TBI, so we only claim architectural // support for memory tagging if the operating system enables TBI. -#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) +// HWASan uses the top byte for its own purpose and Scudo should not touch it. +#if SCUDO_CAN_USE_MTE && !defined(SCUDO_DISABLE_TBI) && \ + !__has_feature(hwaddress_sanitizer) inline constexpr bool archSupportsMemoryTagging() { return true; } #else inline constexpr bool archSupportsMemoryTagging() { return false; } @@ -39,25 +42,25 @@ inline uint8_t extractTag(uptr Ptr) { return (Ptr >> 56) & 0xf; } inline constexpr bool archSupportsMemoryTagging() { return false; } -inline uptr archMemoryTagGranuleSize() { +inline NORETURN uptr archMemoryTagGranuleSize() { UNREACHABLE("memory tagging not supported"); } -inline uptr untagPointer(uptr Ptr) { +inline NORETURN uptr untagPointer(uptr Ptr) { (void)Ptr; UNREACHABLE("memory tagging not supported"); } -inline uint8_t extractTag(uptr Ptr) { +inline NORETURN uint8_t extractTag(uptr Ptr) { (void)Ptr; UNREACHABLE("memory tagging not supported"); } #endif -#if __clang_major__ >= 12 && defined(__aarch64__) +#if __clang_major__ >= 12 && defined(__aarch64__) && !defined(__ILP32__) -#if SCUDO_LINUX +#if SCUDO_CAN_USE_MTE inline bool systemSupportsMemoryTagging() { #ifndef HWCAP2_MTE @@ -91,9 +94,10 @@ inline bool systemDetectsMemoryTagFaultsTestOnly() { #ifndef PR_MTE_TCF_MASK #define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) #endif - return (static_cast( - prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)) & - PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE; + int res = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0); + if (res == -1) + return false; + return (static_cast(res) & PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE; } inline void enableSystemMemoryTaggingTestOnly() { @@ -102,19 +106,19 @@ inline void enableSystemMemoryTaggingTestOnly() { 0, 0, 0); } -#else // !SCUDO_LINUX +#else // !SCUDO_CAN_USE_MTE inline bool systemSupportsMemoryTagging() { return false; } -inline bool systemDetectsMemoryTagFaultsTestOnly() { +inline NORETURN bool systemDetectsMemoryTagFaultsTestOnly() { UNREACHABLE("memory tagging not supported"); } -inline void enableSystemMemoryTaggingTestOnly() { +inline NORETURN void enableSystemMemoryTaggingTestOnly() { UNREACHABLE("memory tagging not supported"); } -#endif // SCUDO_LINUX +#endif // SCUDO_CAN_USE_MTE class ScopedDisableMemoryTagChecks { uptr PrevTCO; @@ -252,15 +256,15 @@ inline uptr loadTag(uptr Ptr) { #else -inline bool systemSupportsMemoryTagging() { +inline NORETURN bool systemSupportsMemoryTagging() { UNREACHABLE("memory tagging not supported"); } -inline bool systemDetectsMemoryTagFaultsTestOnly() { +inline NORETURN bool systemDetectsMemoryTagFaultsTestOnly() { UNREACHABLE("memory tagging not supported"); } -inline void enableSystemMemoryTaggingTestOnly() { +inline NORETURN void enableSystemMemoryTaggingTestOnly() { UNREACHABLE("memory tagging not supported"); } @@ -268,41 +272,44 @@ struct ScopedDisableMemoryTagChecks { ScopedDisableMemoryTagChecks() {} }; -inline uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) { +inline NORETURN uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) { (void)Ptr; (void)ExcludeMask; UNREACHABLE("memory tagging not supported"); } -inline uptr addFixedTag(uptr Ptr, uptr Tag) { +inline NORETURN uptr addFixedTag(uptr Ptr, uptr Tag) { (void)Ptr; (void)Tag; UNREACHABLE("memory tagging not supported"); } -inline uptr storeTags(uptr Begin, uptr End) { +inline NORETURN uptr storeTags(uptr Begin, uptr End) { (void)Begin; (void)End; UNREACHABLE("memory tagging not supported"); } -inline void storeTag(uptr Ptr) { +inline NORETURN void storeTag(uptr Ptr) { (void)Ptr; UNREACHABLE("memory tagging not supported"); } -inline uptr loadTag(uptr Ptr) { +inline NORETURN uptr loadTag(uptr Ptr) { (void)Ptr; UNREACHABLE("memory tagging not supported"); } #endif +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-noreturn" inline void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask, uptr *TaggedBegin, uptr *TaggedEnd) { *TaggedBegin = selectRandomTag(reinterpret_cast(Ptr), ExcludeMask); *TaggedEnd = storeTags(*TaggedBegin, *TaggedBegin + Size); } +#pragma GCC diagnostic pop inline void *untagPointer(void *Ptr) { return reinterpret_cast(untagPointer(reinterpret_cast(Ptr))); diff --git a/third_party/llvm-scudo-standalone/mutex.h b/third_party/llvm-scudo-standalone/mutex.h index c8504c0..4caa945 100644 --- a/third_party/llvm-scudo-standalone/mutex.h +++ b/third_party/llvm-scudo-standalone/mutex.h @@ -11,6 +11,7 @@ #include "atomic_helpers.h" #include "common.h" +#include "thread_annotations.h" #include @@ -20,10 +21,10 @@ namespace scudo { -class HybridMutex { +class CAPABILITY("mutex") HybridMutex { public: - bool tryLock(); - NOINLINE void lock() { + bool tryLock() TRY_ACQUIRE(true); + NOINLINE void lock() ACQUIRE() { if (LIKELY(tryLock())) return; // The compiler may try to fully unroll the loop, ending up in a @@ -34,17 +35,41 @@ class HybridMutex { #pragma nounroll #endif for (u8 I = 0U; I < NumberOfTries; I++) { - yieldProcessor(NumberOfYields); + delayLoop(); if (tryLock()) return; } lockSlow(); } - void unlock(); + void unlock() RELEASE(); + + // TODO(chiahungduan): In general, we may want to assert the owner of lock as + // well. Given the current uses of HybridMutex, it's acceptable without + // asserting the owner. Re-evaluate this when we have certain scenarios which + // requires a more fine-grained lock granularity. + ALWAYS_INLINE void assertHeld() ASSERT_CAPABILITY(this) { + if (SCUDO_DEBUG) + assertHeldImpl(); + } private: - static constexpr u8 NumberOfTries = 8U; - static constexpr u8 NumberOfYields = 8U; + void delayLoop() { + // The value comes from the average time spent in accessing caches (which + // are the fastest operations) so that we are unlikely to wait too long for + // fast operations. + constexpr u32 SpinTimes = 16; + volatile u32 V = 0; + for (u32 I = 0; I < SpinTimes; ++I) { + u32 Tmp = V + 1; + V = Tmp; + } + } + + void assertHeldImpl(); + + // TODO(chiahungduan): Adapt this value based on scenarios. E.g., primary and + // secondary allocator have different allocation times. + static constexpr u8 NumberOfTries = 32U; #if SCUDO_LINUX atomic_u32 M = {}; @@ -52,13 +77,13 @@ class HybridMutex { sync_mutex_t M = {}; #endif - void lockSlow(); + void lockSlow() ACQUIRE(); }; -class ScopedLock { +class SCOPED_CAPABILITY ScopedLock { public: - explicit ScopedLock(HybridMutex &M) : Mutex(M) { Mutex.lock(); } - ~ScopedLock() { Mutex.unlock(); } + explicit ScopedLock(HybridMutex &M) ACQUIRE(M) : Mutex(M) { Mutex.lock(); } + ~ScopedLock() RELEASE() { Mutex.unlock(); } private: HybridMutex &Mutex; diff --git a/third_party/llvm-scudo-standalone/options.h b/third_party/llvm-scudo-standalone/options.h index 4e67865..b20142a 100644 --- a/third_party/llvm-scudo-standalone/options.h +++ b/third_party/llvm-scudo-standalone/options.h @@ -38,7 +38,7 @@ struct Options { } }; -template bool useMemoryTagging(Options Options) { +template bool useMemoryTagging(const Options &Options) { return allocatorSupportsMemoryTagging() && Options.get(OptionBit::UseMemoryTagging); } diff --git a/third_party/llvm-scudo-standalone/platform.h b/third_party/llvm-scudo-standalone/platform.h index 36378d1..b71a86b 100644 --- a/third_party/llvm-scudo-standalone/platform.h +++ b/third_party/llvm-scudo-standalone/platform.h @@ -37,7 +37,13 @@ #define SCUDO_TRUSTY 0 #endif -#if __LP64__ +#if defined(__riscv) && (__riscv_xlen == 64) +#define SCUDO_RISCV64 1 +#else +#define SCUDO_RISCV64 0 +#endif + +#if defined(__LP64__) #define SCUDO_WORDSIZE 64U #else #define SCUDO_WORDSIZE 32U @@ -53,6 +59,24 @@ #define SCUDO_CAN_USE_PRIMARY64 (SCUDO_WORDSIZE == 64U) #endif +#ifndef SCUDO_CAN_USE_MTE +#define SCUDO_CAN_USE_MTE (SCUDO_LINUX || SCUDO_TRUSTY) +#endif + +// Use smaller table sizes for fuzzing in order to reduce input size. +// Trusty just has less available memory. +#ifndef SCUDO_SMALL_STACK_DEPOT +#if defined(SCUDO_FUZZ) || SCUDO_TRUSTY +#define SCUDO_SMALL_STACK_DEPOT 1 +#else +#define SCUDO_SMALL_STACK_DEPOT 0 +#endif +#endif + +#ifndef SCUDO_ENABLE_HOOKS +#define SCUDO_ENABLE_HOOKS 0 +#endif + #ifndef SCUDO_MIN_ALIGNMENT_LOG // We force malloc-type functions to be aligned to std::max_align_t, but there // is no reason why the minimum alignment for all other functions can't be 8 diff --git a/third_party/llvm-scudo-standalone/primary32.h b/third_party/llvm-scudo-standalone/primary32.h index 326c10a..4d03b28 100644 --- a/third_party/llvm-scudo-standalone/primary32.h +++ b/third_party/llvm-scudo-standalone/primary32.h @@ -9,6 +9,7 @@ #ifndef SCUDO_PRIMARY32_H_ #define SCUDO_PRIMARY32_H_ +#include "allocator_common.h" #include "bytemap.h" #include "common.h" #include "list.h" @@ -18,6 +19,7 @@ #include "report.h" #include "stats.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -41,26 +43,32 @@ namespace scudo { template class SizeClassAllocator32 { public: - typedef typename Config::PrimaryCompactPtrT CompactPtrT; - typedef typename Config::SizeClassMap SizeClassMap; + typedef typename Config::Primary::CompactPtrT CompactPtrT; + typedef typename Config::Primary::SizeClassMap SizeClassMap; + static const uptr GroupSizeLog = Config::Primary::GroupSizeLog; // The bytemap can only track UINT8_MAX - 1 classes. static_assert(SizeClassMap::LargestClassId <= (UINT8_MAX - 1), ""); // Regions should be large enough to hold the largest Block. - static_assert((1UL << Config::PrimaryRegionSizeLog) >= SizeClassMap::MaxSize, + static_assert((1UL << Config::Primary::RegionSizeLog) >= + SizeClassMap::MaxSize, ""); typedef SizeClassAllocator32 ThisT; typedef SizeClassAllocatorLocalCache CacheT; - typedef typename CacheT::TransferBatch TransferBatch; + typedef TransferBatch TransferBatchT; + typedef BatchGroup BatchGroupT; + + static_assert(sizeof(BatchGroupT) <= sizeof(TransferBatchT), + "BatchGroupT uses the same class size as TransferBatchT"); static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) - ? sizeof(TransferBatch) + ? sizeof(TransferBatchT) : SizeClassMap::getSizeByClassId(ClassId); } static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - void init(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { if (SCUDO_FUCHSIA) reportError("SizeClassAllocator32 is not supported on Fuchsia"); @@ -70,7 +78,7 @@ template class SizeClassAllocator32 { DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); PossibleRegions.init(); u32 Seed; - const u64 Time = getMonotonicTime(); + const u64 Time = getMonotonicTimeFast(); if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) Seed = static_cast( Time ^ (reinterpret_cast(SizeClassInfoArray) >> 6)); @@ -85,24 +93,77 @@ template class SizeClassAllocator32 { } void unmapTestOnly() { - while (NumberOfStashedRegions > 0) - unmap(reinterpret_cast(RegionsStash[--NumberOfStashedRegions]), - RegionSize); + { + ScopedLock L(RegionsStashMutex); + while (NumberOfStashedRegions > 0) { + unmap(reinterpret_cast(RegionsStash[--NumberOfStashedRegions]), + RegionSize); + } + } + uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); if (Sci->MinRegionIndex < MinRegionIndex) MinRegionIndex = Sci->MinRegionIndex; if (Sci->MaxRegionIndex > MaxRegionIndex) MaxRegionIndex = Sci->MaxRegionIndex; *Sci = {}; } - for (uptr I = MinRegionIndex; I < MaxRegionIndex; I++) + + ScopedLock L(ByteMapMutex); + for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) if (PossibleRegions[I]) unmap(reinterpret_cast(I * RegionSize), RegionSize); PossibleRegions.unmapTestOnly(); } + // When all blocks are freed, it has to be the same size as `AllocatedUser`. + void verifyAllBlocksAreReleasedTestOnly() { + // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass. + uptr BatchClassUsedInFreeLists = 0; + for (uptr I = 0; I < NumClasses; I++) { + // We have to count BatchClassUsedInFreeLists in other regions first. + if (I == SizeClassMap::BatchClassId) + continue; + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L1(Sci->Mutex); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { + // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`. + BatchClassUsedInFreeLists += BG.Batches.size() + 1; + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } + + const uptr BlockSize = getSizeByClassId(I); + DCHECK_EQ(TotalBlocks, Sci->AllocatedUser / BlockSize); + DCHECK_EQ(Sci->FreeListInfo.PushedBlocks, Sci->FreeListInfo.PoppedBlocks); + } + + SizeClassInfo *Sci = getSizeClassInfo(SizeClassMap::BatchClassId); + ScopedLock L1(Sci->Mutex); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { + if (LIKELY(!BG.Batches.empty())) { + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } else { + // `BatchGroup` with empty freelist doesn't have `TransferBatch` record + // itself. + ++TotalBlocks; + } + } + + const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId); + DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists, + Sci->AllocatedUser / BlockSize); + const uptr BlocksInUse = + Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; + DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); + } + CompactPtrT compactPtr(UNUSED uptr ClassId, uptr Ptr) const { return static_cast(Ptr); } @@ -111,35 +172,95 @@ template class SizeClassAllocator32 { return reinterpret_cast(static_cast(CompactPtr)); } - TransferBatch *popBatch(CacheT *C, uptr ClassId) { + uptr compactPtrGroupBase(CompactPtrT CompactPtr) { + const uptr Mask = (static_cast(1) << GroupSizeLog) - 1; + return CompactPtr & ~Mask; + } + + uptr decompactGroupBase(uptr CompactPtrGroupBase) { + return CompactPtrGroupBase; + } + + ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize < PageSize / 16U; + } + + ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize > PageSize; + } + + // Note that the `MaxBlockCount` will be used when we support arbitrary blocks + // count. Now it's the same as the number of blocks stored in the + // `TransferBatch`. + u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray, + UNUSED const u16 MaxBlockCount) { + TransferBatchT *B = popBatch(C, ClassId); + if (!B) + return 0; + + const u16 Count = B->getCount(); + DCHECK_GT(Count, 0U); + B->moveToArray(ToArray); + + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, B); + + return Count; + } + + TransferBatchT *popBatch(CacheT *C, uptr ClassId) { DCHECK_LT(ClassId, NumClasses); SizeClassInfo *Sci = getSizeClassInfo(ClassId); ScopedLock L(Sci->Mutex); - TransferBatch *B = Sci->FreeList.front(); - if (B) { - Sci->FreeList.pop_front(); - } else { - B = populateFreeList(C, ClassId, Sci); - if (UNLIKELY(!B)) + TransferBatchT *B = popBatchImpl(C, ClassId, Sci); + if (UNLIKELY(!B)) { + if (UNLIKELY(!populateFreeList(C, ClassId, Sci))) return nullptr; + B = popBatchImpl(C, ClassId, Sci); + // if `populateFreeList` succeeded, we are supposed to get free blocks. + DCHECK_NE(B, nullptr); } - DCHECK_GT(B->getCount(), 0); - Sci->Stats.PoppedBlocks += B->getCount(); return B; } - void pushBatch(uptr ClassId, TransferBatch *B) { + // Push the array of free blocks to the designated batch group. + void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) { DCHECK_LT(ClassId, NumClasses); - DCHECK_GT(B->getCount(), 0); + DCHECK_GT(Size, 0); + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + if (ClassId == SizeClassMap::BatchClassId) { + ScopedLock L(Sci->Mutex); + pushBatchClassBlocks(Sci, Array, Size); + return; + } + + // TODO(chiahungduan): Consider not doing grouping if the group size is not + // greater than the block size with a certain scale. + + // Sort the blocks so that blocks belonging to the same group can be pushed + // together. + bool SameGroup = true; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) + SameGroup = false; + CompactPtrT Cur = Array[I]; + u32 J = I; + while (J > 0 && + compactPtrGroupBase(Cur) < compactPtrGroupBase(Array[J - 1])) { + Array[J] = Array[J - 1]; + --J; + } + Array[J] = Cur; + } + ScopedLock L(Sci->Mutex); - Sci->FreeList.push_front(B); - Sci->Stats.PushedBlocks += B->getCount(); - if (ClassId != SizeClassMap::BatchClassId) - releaseToOSMaybe(Sci, ClassId); + pushBlocksImpl(C, ClassId, Sci, Array, Size, SameGroup); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { // The BatchClassId must be locked last since other classes can use it. for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { if (static_cast(I) == SizeClassMap::BatchClassId) @@ -148,11 +269,11 @@ template class SizeClassAllocator32 { } getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.lock(); RegionsStashMutex.lock(); - PossibleRegions.disable(); + ByteMapMutex.lock(); } - void enable() { - PossibleRegions.enable(); + void enable() NO_THREAD_SAFETY_ANALYSIS { + ByteMapMutex.unlock(); RegionsStashMutex.unlock(); getSizeClassInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); for (uptr I = 0; I < NumClasses; I++) { @@ -166,12 +287,20 @@ template class SizeClassAllocator32 { uptr MinRegionIndex = NumRegions, MaxRegionIndex = 0; for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); + // TODO: The call of `iterateOverBlocks` requires disabling + // SizeClassAllocator32. We may consider locking each region on demand + // only. + Sci->Mutex.assertHeld(); if (Sci->MinRegionIndex < MinRegionIndex) MinRegionIndex = Sci->MinRegionIndex; if (Sci->MaxRegionIndex > MaxRegionIndex) MaxRegionIndex = Sci->MaxRegionIndex; } - for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) + + // SizeClassAllocator32 is disabled, i.e., ByteMapMutex is held. + ByteMapMutex.assertHeld(); + + for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) { if (PossibleRegions[I] && (PossibleRegions[I] - 1U) != SizeClassMap::BatchClassId) { const uptr BlockSize = getSizeByClassId(PossibleRegions[I] - 1U); @@ -180,6 +309,7 @@ template class SizeClassAllocator32 { for (uptr Block = From; Block < To; Block += BlockSize) Callback(Block); } + } } void getStats(ScopedString *Str) { @@ -189,22 +319,38 @@ template class SizeClassAllocator32 { uptr PushedBlocks = 0; for (uptr I = 0; I < NumClasses; I++) { SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); TotalMapped += Sci->AllocatedUser; - PoppedBlocks += Sci->Stats.PoppedBlocks; - PushedBlocks += Sci->Stats.PushedBlocks; + PoppedBlocks += Sci->FreeListInfo.PoppedBlocks; + PushedBlocks += Sci->FreeListInfo.PushedBlocks; } Str->append("Stats: SizeClassAllocator32: %zuM mapped in %zu allocations; " "remains %zu\n", TotalMapped >> 20, PoppedBlocks, PoppedBlocks - PushedBlocks); - for (uptr I = 0; I < NumClasses; I++) - getStats(Str, I, 0); + for (uptr I = 0; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + getStats(Str, I, Sci); + } + } + + void getFragmentationInfo(ScopedString *Str) { + Str->append( + "Fragmentation Stats: SizeClassAllocator32: page size = %zu bytes\n", + getPageSizeCached()); + + for (uptr I = 1; I < NumClasses; I++) { + SizeClassInfo *Sci = getSizeClassInfo(I); + ScopedLock L(Sci->Mutex); + getSizeClassFragmentationInfo(Sci, I, Str); + } } bool setOption(Option O, sptr Value) { if (O == Option::ReleaseInterval) { - const s32 Interval = Max( - Min(static_cast(Value), Config::PrimaryMaxReleaseToOsIntervalMs), - Config::PrimaryMinReleaseToOsIntervalMs); + const s32 Interval = Max(Min(static_cast(Value), + Config::Primary::MaxReleaseToOsIntervalMs), + Config::Primary::MinReleaseToOsIntervalMs); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; } @@ -212,14 +358,22 @@ template class SizeClassAllocator32 { return true; } - uptr releaseToOS() { + uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) { + SizeClassInfo *Sci = getSizeClassInfo(ClassId); + // TODO: Once we have separate locks like primary64, we may consider using + // tryLock() as well. + ScopedLock L(Sci->Mutex); + return releaseToOSMaybe(Sci, ClassId, ReleaseType); + } + + uptr releaseToOS(ReleaseToOS ReleaseType) { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; SizeClassInfo *Sci = getSizeClassInfo(I); ScopedLock L(Sci->Mutex); - TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true); + TotalReleasedBytes += releaseToOSMaybe(Sci, I, ReleaseType); } return TotalReleasedBytes; } @@ -236,42 +390,42 @@ template class SizeClassAllocator32 { private: static const uptr NumClasses = SizeClassMap::NumClasses; - static const uptr RegionSize = 1UL << Config::PrimaryRegionSizeLog; + static const uptr RegionSize = 1UL << Config::Primary::RegionSizeLog; static const uptr NumRegions = - SCUDO_MMAP_RANGE_SIZE >> Config::PrimaryRegionSizeLog; + SCUDO_MMAP_RANGE_SIZE >> Config::Primary::RegionSizeLog; static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; typedef FlatByteMap ByteMap; - struct SizeClassStats { - uptr PoppedBlocks; - uptr PushedBlocks; - }; - struct ReleaseToOsInfo { - uptr PushedBlocksAtLastRelease; + uptr BytesInFreeListAtLastCheckpoint; uptr RangesReleased; uptr LastReleasedBytes; u64 LastReleaseAtNs; }; + struct BlocksInfo { + SinglyLinkedList BlockList = {}; + uptr PoppedBlocks = 0; + uptr PushedBlocks = 0; + }; + struct alignas(SCUDO_CACHE_LINE_SIZE) SizeClassInfo { HybridMutex Mutex; - SinglyLinkedList FreeList; - uptr CurrentRegion; - uptr CurrentRegionAllocated; - SizeClassStats Stats; + BlocksInfo FreeListInfo GUARDED_BY(Mutex); + uptr CurrentRegion GUARDED_BY(Mutex); + uptr CurrentRegionAllocated GUARDED_BY(Mutex); u32 RandState; - uptr AllocatedUser; + uptr AllocatedUser GUARDED_BY(Mutex); // Lowest & highest region index allocated for this size class, to avoid // looping through the whole NumRegions. - uptr MinRegionIndex; - uptr MaxRegionIndex; - ReleaseToOsInfo ReleaseInfo; + uptr MinRegionIndex GUARDED_BY(Mutex); + uptr MaxRegionIndex GUARDED_BY(Mutex); + ReleaseToOsInfo ReleaseInfo GUARDED_BY(Mutex); }; static_assert(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr computeRegionId(uptr Mem) { - const uptr Id = Mem >> Config::PrimaryRegionSizeLog; + const uptr Id = Mem >> Config::Primary::RegionSizeLog; CHECK_LT(Id, NumRegions); return Id; } @@ -291,17 +445,22 @@ template class SizeClassAllocator32 { else MapSize = RegionSize; } else { - Region = roundUpTo(MapBase, RegionSize); + Region = roundUp(MapBase, RegionSize); unmap(reinterpret_cast(MapBase), Region - MapBase); MapSize = RegionSize; } const uptr End = Region + MapSize; if (End != MapEnd) unmap(reinterpret_cast(End), MapEnd - End); + + DCHECK_EQ(Region % RegionSize, 0U); + static_assert(Config::Primary::RegionSizeLog == GroupSizeLog, + "Memory group should be the same size as Region"); + return Region; } - uptr allocateRegion(SizeClassInfo *Sci, uptr ClassId) { + uptr allocateRegion(SizeClassInfo *Sci, uptr ClassId) REQUIRES(Sci->Mutex) { DCHECK_LT(ClassId, NumClasses); uptr Region = 0; { @@ -318,6 +477,7 @@ template class SizeClassAllocator32 { Sci->MinRegionIndex = RegionIndex; if (RegionIndex > Sci->MaxRegionIndex) Sci->MaxRegionIndex = RegionIndex; + ScopedLock L(ByteMapMutex); PossibleRegions.set(RegionIndex, static_cast(ClassId + 1U)); } return Region; @@ -328,8 +488,295 @@ template class SizeClassAllocator32 { return &SizeClassInfoArray[ClassId]; } - NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, - SizeClassInfo *Sci) { + void pushBatchClassBlocks(SizeClassInfo *Sci, CompactPtrT *Array, u32 Size) + REQUIRES(Sci->Mutex) { + DCHECK_EQ(Sci, getSizeClassInfo(SizeClassMap::BatchClassId)); + + // Free blocks are recorded by TransferBatch in freelist for all + // size-classes. In addition, TransferBatch is allocated from BatchClassId. + // In order not to use additional block to record the free blocks in + // BatchClassId, they are self-contained. I.e., A TransferBatch records the + // block address of itself. See the figure below: + // + // TransferBatch at 0xABCD + // +----------------------------+ + // | Free blocks' addr | + // | +------+------+------+ | + // | |0xABCD|... |... | | + // | +------+------+------+ | + // +----------------------------+ + // + // When we allocate all the free blocks in the TransferBatch, the block used + // by TransferBatch is also free for use. We don't need to recycle the + // TransferBatch. Note that the correctness is maintained by the invariant, + // + // The unit of each popBatch() request is entire TransferBatch. Return + // part of the blocks in a TransferBatch is invalid. + // + // This ensures that TransferBatch won't leak the address itself while it's + // still holding other valid data. + // + // Besides, BatchGroup is also allocated from BatchClassId and has its + // address recorded in the TransferBatch too. To maintain the correctness, + // + // The address of BatchGroup is always recorded in the last TransferBatch + // in the freelist (also imply that the freelist should only be + // updated with push_front). Once the last TransferBatch is popped, + // the block used by BatchGroup is also free for use. + // + // With this approach, the blocks used by BatchGroup and TransferBatch are + // reusable and don't need additional space for them. + + Sci->FreeListInfo.PushedBlocks += Size; + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + + if (BG == nullptr) { + // Construct `BatchGroup` on the last element. + BG = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + --Size; + BG->Batches.clear(); + // BatchClass hasn't enabled memory group. Use `0` to indicate there's no + // memory group here. + BG->CompactPtrGroupBase = 0; + // `BG` is also the block of BatchClassId. Note that this is different + // from `CreateGroup` in `pushBlocksImpl` + BG->PushedBlocks = 1; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = + CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId)); + + Sci->FreeListInfo.BlockList.push_front(BG); + } + + if (UNLIKELY(Size == 0)) + return; + + // This happens under 2 cases. + // 1. just allocated a new `BatchGroup`. + // 2. Only 1 block is pushed when the freelist is empty. + if (BG->Batches.empty()) { + // Construct the `TransferBatch` on the last element. + TransferBatchT *TB = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + TB->clear(); + // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are + // recorded in the TransferBatch. + TB->add(Array[Size - 1]); + TB->add( + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(BG))); + --Size; + DCHECK_EQ(BG->PushedBlocks, 1U); + // `TB` is also the block of BatchClassId. + BG->PushedBlocks += 1; + BG->Batches.push_front(TB); + } + + TransferBatchT *CurBatch = BG->Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + u16 UnusedSlots = + static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[I])); + CurBatch->clear(); + // Self-contained + CurBatch->add(Array[I]); + ++I; + // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of + // BatchClassId. + BG->Batches.push_front(CurBatch); + UnusedSlots = static_cast(BG->MaxCachedPerBatch - 1); + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + const u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } + + BG->PushedBlocks += Size; + } + // Push the blocks to their batch group. The layout will be like, + // + // FreeListInfo.BlockList - > BG -> BG -> BG + // | | | + // v v v + // TB TB TB + // | + // v + // TB + // + // Each BlockGroup(BG) will associate with unique group id and the free blocks + // are managed by a list of TransferBatch(TB). To reduce the time of inserting + // blocks, BGs are sorted and the input `Array` are supposed to be sorted so + // that we can get better performance of maintaining sorted property. + // Use `SameGroup=true` to indicate that all blocks in the array are from the + // same group then we will skip checking the group id of each block. + // + // The region mutex needs to be held while calling this method. + void pushBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci, + CompactPtrT *Array, u32 Size, bool SameGroup = false) + REQUIRES(Sci->Mutex) { + DCHECK_NE(ClassId, SizeClassMap::BatchClassId); + DCHECK_GT(Size, 0U); + + auto CreateGroup = [&](uptr CompactPtrGroupBase) { + BatchGroupT *BG = + reinterpret_cast(C->getBatchClassBlock()); + BG->Batches.clear(); + TransferBatchT *TB = + reinterpret_cast(C->getBatchClassBlock()); + TB->clear(); + + BG->CompactPtrGroupBase = CompactPtrGroupBase; + BG->Batches.push_front(TB); + BG->PushedBlocks = 0; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId)); + + return BG; + }; + + auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) { + SinglyLinkedList &Batches = BG->Batches; + TransferBatchT *CurBatch = Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount()); + u16 UnusedSlots = + static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = + reinterpret_cast(C->getBatchClassBlock()); + CurBatch->clear(); + Batches.push_front(CurBatch); + UnusedSlots = BG->MaxCachedPerBatch; + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } + + BG->PushedBlocks += Size; + }; + + Sci->FreeListInfo.PushedBlocks += Size; + BatchGroupT *Cur = Sci->FreeListInfo.BlockList.front(); + + // In the following, `Cur` always points to the BatchGroup for blocks that + // will be pushed next. `Prev` is the element right before `Cur`. + BatchGroupT *Prev = nullptr; + + while (Cur != nullptr && + compactPtrGroupBase(Array[0]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } + + if (Cur == nullptr || + compactPtrGroupBase(Array[0]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroupBase(Array[0])); + if (Prev == nullptr) + Sci->FreeListInfo.BlockList.push_front(Cur); + else + Sci->FreeListInfo.BlockList.insert(Prev, Cur); + } + + // All the blocks are from the same group, just push without checking group + // id. + if (SameGroup) { + for (u32 I = 0; I < Size; ++I) + DCHECK_EQ(compactPtrGroupBase(Array[I]), Cur->CompactPtrGroupBase); + + InsertBlocks(Cur, Array, Size); + return; + } + + // The blocks are sorted by group id. Determine the segment of group and + // push them to their group together. + u32 Count = 1; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroupBase(Array[I - 1]) != compactPtrGroupBase(Array[I])) { + DCHECK_EQ(compactPtrGroupBase(Array[I - 1]), Cur->CompactPtrGroupBase); + InsertBlocks(Cur, Array + I - Count, Count); + + while (Cur != nullptr && + compactPtrGroupBase(Array[I]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } + + if (Cur == nullptr || + compactPtrGroupBase(Array[I]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroupBase(Array[I])); + DCHECK_NE(Prev, nullptr); + Sci->FreeListInfo.BlockList.insert(Prev, Cur); + } + + Count = 1; + } else { + ++Count; + } + } + + InsertBlocks(Cur, Array + Size - Count, Count); + } + + // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest + // group id will be considered first. + // + // The region mutex needs to be held while calling this method. + TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex) { + if (Sci->FreeListInfo.BlockList.empty()) + return nullptr; + + SinglyLinkedList &Batches = + Sci->FreeListInfo.BlockList.front()->Batches; + + if (Batches.empty()) { + DCHECK_EQ(ClassId, SizeClassMap::BatchClassId); + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + Sci->FreeListInfo.BlockList.pop_front(); + + // Block used by `BatchGroup` is from BatchClassId. Turn the block into + // `TransferBatch` with single block. + TransferBatchT *TB = reinterpret_cast(BG); + TB->clear(); + TB->add( + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB))); + Sci->FreeListInfo.PoppedBlocks += 1; + return TB; + } + + TransferBatchT *B = Batches.front(); + Batches.pop_front(); + DCHECK_NE(B, nullptr); + DCHECK_GT(B->getCount(), 0U); + + if (Batches.empty()) { + BatchGroupT *BG = Sci->FreeListInfo.BlockList.front(); + Sci->FreeListInfo.BlockList.pop_front(); + + // We don't keep BatchGroup with zero blocks to avoid empty-checking while + // allocating. Note that block used by constructing BatchGroup is recorded + // as free blocks in the last element of BatchGroup::Batches. Which means, + // once we pop the last TransferBatch, the block is implicitly + // deallocated. + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, BG); + } + + Sci->FreeListInfo.PoppedBlocks += B->getCount(); + return B; + } + + NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex) { uptr Region; uptr Offset; // If the size-class currently has a region associated to it, use it. The @@ -344,14 +791,14 @@ template class SizeClassAllocator32 { DCHECK_EQ(Sci->CurrentRegionAllocated, 0U); Region = allocateRegion(Sci, ClassId); if (UNLIKELY(!Region)) - return nullptr; + return false; C->getStats().add(StatMapped, RegionSize); Sci->CurrentRegion = Region; Offset = 0; } const uptr Size = getSizeByClassId(ClassId); - const u32 MaxCount = TransferBatch::getMaxCached(Size); + const u16 MaxCount = CacheT::getMaxCached(Size); DCHECK_GT(MaxCount, 0U); // The maximum number of blocks we should carve in the region is dictated // by the maximum number of batches we want to fill, and the amount of @@ -364,7 +811,7 @@ template class SizeClassAllocator32 { DCHECK_GT(NumberOfBlocks, 0U); constexpr u32 ShuffleArraySize = - MaxNumBatches * TransferBatch::MaxNumCached; + MaxNumBatches * TransferBatchT::MaxNumCached; // Fill the transfer batches and put them in the size-class freelist. We // need to randomize the blocks for security purposes, so we first fill a // local array that we then shuffle before populating the batches. @@ -374,23 +821,34 @@ template class SizeClassAllocator32 { uptr P = Region + Offset; for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) ShuffleArray[I] = reinterpret_cast(P); - // No need to shuffle the batches size class. - if (ClassId != SizeClassMap::BatchClassId) - shuffle(ShuffleArray, NumberOfBlocks, &Sci->RandState); - for (u32 I = 0; I < NumberOfBlocks;) { - TransferBatch *B = - C->createBatch(ClassId, reinterpret_cast(ShuffleArray[I])); - if (UNLIKELY(!B)) - return nullptr; - const u32 N = Min(MaxCount, NumberOfBlocks - I); - B->setFromArray(&ShuffleArray[I], N); - Sci->FreeList.push_back(B); - I += N; + + if (ClassId != SizeClassMap::BatchClassId) { + u32 N = 1; + uptr CurGroup = compactPtrGroupBase(ShuffleArray[0]); + for (u32 I = 1; I < NumberOfBlocks; I++) { + if (UNLIKELY(compactPtrGroupBase(ShuffleArray[I]) != CurGroup)) { + shuffle(ShuffleArray + I - N, N, &Sci->RandState); + pushBlocksImpl(C, ClassId, Sci, ShuffleArray + I - N, N, + /*SameGroup=*/true); + N = 1; + CurGroup = compactPtrGroupBase(ShuffleArray[I]); + } else { + ++N; + } + } + + shuffle(ShuffleArray + NumberOfBlocks - N, N, &Sci->RandState); + pushBlocksImpl(C, ClassId, Sci, &ShuffleArray[NumberOfBlocks - N], N, + /*SameGroup=*/true); + } else { + pushBatchClassBlocks(Sci, ShuffleArray, NumberOfBlocks); } - TransferBatch *B = Sci->FreeList.front(); - Sci->FreeList.pop_front(); - DCHECK(B); - DCHECK_GT(B->getCount(), 0); + + // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record + // the requests from `PushBlocks` and `PopBatch` which are external + // interfaces. `populateFreeList` is the internal interface so we should set + // the values back to avoid incorrectly setting the stats. + Sci->FreeListInfo.PushedBlocks -= NumberOfBlocks; const uptr AllocatedUser = Size * NumberOfBlocks; C->getStats().add(StatFree, AllocatedUser); @@ -406,60 +864,105 @@ template class SizeClassAllocator32 { } Sci->AllocatedUser += AllocatedUser; - return B; + return true; } - void getStats(ScopedString *Str, uptr ClassId, uptr Rss) { - SizeClassInfo *Sci = getSizeClassInfo(ClassId); + void getStats(ScopedString *Str, uptr ClassId, SizeClassInfo *Sci) + REQUIRES(Sci->Mutex) { if (Sci->AllocatedUser == 0) return; - const uptr InUse = Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks; - const uptr AvailableChunks = Sci->AllocatedUser / getSizeByClassId(ClassId); + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr InUse = + Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; + const uptr BytesInFreeList = Sci->AllocatedUser - InUse * BlockSize; + uptr PushedBytesDelta = 0; + if (BytesInFreeList >= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + PushedBytesDelta = + BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + } + const uptr AvailableChunks = Sci->AllocatedUser / BlockSize; Str->append(" %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " - "inuse: %6zu avail: %6zu rss: %6zuK releases: %6zu\n", + "inuse: %6zu avail: %6zu releases: %6zu last released: %6zuK " + "latest pushed bytes: %6zuK\n", ClassId, getSizeByClassId(ClassId), Sci->AllocatedUser >> 10, - Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks, InUse, - AvailableChunks, Rss >> 10, Sci->ReleaseInfo.RangesReleased); + Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks, + InUse, AvailableChunks, Sci->ReleaseInfo.RangesReleased, + Sci->ReleaseInfo.LastReleasedBytes >> 10, + PushedBytesDelta >> 10); } - NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, - bool Force = false) { + void getSizeClassFragmentationInfo(SizeClassInfo *Sci, uptr ClassId, + ScopedString *Str) REQUIRES(Sci->Mutex) { const uptr BlockSize = getSizeByClassId(ClassId); + const uptr First = Sci->MinRegionIndex; + const uptr Last = Sci->MaxRegionIndex; + const uptr Base = First * RegionSize; + const uptr NumberOfRegions = Last - First + 1U; + auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { + ScopedLock L(ByteMapMutex); + return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; + }; + + FragmentationRecorder Recorder; + if (!Sci->FreeListInfo.BlockList.empty()) { + PageReleaseContext Context = + markFreeBlocks(Sci, ClassId, BlockSize, Base, NumberOfRegions, + ReleaseToOS::ForceAll); + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + } + const uptr PageSize = getPageSizeCached(); + const uptr TotalBlocks = Sci->AllocatedUser / BlockSize; + const uptr InUseBlocks = + Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks; + uptr AllocatedPagesCount = 0; + if (TotalBlocks != 0U) { + for (uptr I = 0; I < NumberOfRegions; ++I) { + if (SkipRegion(I)) + continue; + AllocatedPagesCount += RegionSize / PageSize; + } + + DCHECK_NE(AllocatedPagesCount, 0U); + } + + DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount()); + const uptr InUsePages = + AllocatedPagesCount - Recorder.getReleasedPagesCount(); + const uptr InUseBytes = InUsePages * PageSize; + + uptr Integral; + uptr Fractional; + computePercentage(BlockSize * InUseBlocks, InUsePages * PageSize, &Integral, + &Fractional); + Str->append(" %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total " + "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n", + ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages, + AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional); + } + + NOINLINE uptr releaseToOSMaybe(SizeClassInfo *Sci, uptr ClassId, + ReleaseToOS ReleaseType = ReleaseToOS::Normal) + REQUIRES(Sci->Mutex) { + const uptr BlockSize = getSizeByClassId(ClassId); - DCHECK_GE(Sci->Stats.PoppedBlocks, Sci->Stats.PushedBlocks); + DCHECK_GE(Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks); const uptr BytesInFreeList = Sci->AllocatedUser - - (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize; - if (BytesInFreeList < PageSize) - return 0; // No chance to release anything. - const uptr BytesPushed = - (Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize; - if (BytesPushed < PageSize) - return 0; // Nothing new to release. + (Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks) * + BlockSize; - // Releasing smaller blocks is expensive, so we want to make sure that a - // significant amount of bytes are free, and that there has been a good - // amount of batches pushed to the freelist before attempting to release. - if (BlockSize < PageSize / 16U) { - if (!Force && BytesPushed < Sci->AllocatedUser / 16U) - return 0; - // We want 8x% to 9x% free bytes (the larger the block, the lower the %). - if ((BytesInFreeList * 100U) / Sci->AllocatedUser < - (100U - 1U - BlockSize / 16U)) - return 0; - } + if (UNLIKELY(BytesInFreeList == 0)) + return 0; - if (!Force) { - const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); - if (IntervalMs < 0) - return 0; - if (Sci->ReleaseInfo.LastReleaseAtNs + - static_cast(IntervalMs) * 1000000 > - getMonotonicTime()) { - return 0; // Memory was returned recently. - } + // ====================================================================== // + // 1. Check if we have enough free blocks and if it's worth doing a page + // release. + // ====================================================================== // + if (ReleaseType != ReleaseToOS::ForceAll && + !hasChanceToReleasePages(Sci, BlockSize, BytesInFreeList, + ReleaseType)) { + return 0; } const uptr First = Sci->MinRegionIndex; @@ -469,37 +972,197 @@ template class SizeClassAllocator32 { uptr TotalReleasedBytes = 0; const uptr Base = First * RegionSize; const uptr NumberOfRegions = Last - First + 1U; + + // ==================================================================== // + // 2. Mark the free blocks and we can tell which pages are in-use by + // querying `PageReleaseContext`. + // ==================================================================== // + PageReleaseContext Context = markFreeBlocks(Sci, ClassId, BlockSize, Base, + NumberOfRegions, ReleaseType); + if (!Context.hasBlockMarked()) + return 0; + + // ==================================================================== // + // 3. Release the unused physical pages back to the OS. + // ==================================================================== // ReleaseRecorder Recorder(Base); auto SkipRegion = [this, First, ClassId](uptr RegionIndex) { + ScopedLock L(ByteMapMutex); return (PossibleRegions[First + RegionIndex] - 1U) != ClassId; }; - auto DecompactPtr = [](CompactPtrT CompactPtr) { - return reinterpret_cast(CompactPtr); - }; - releaseFreeMemoryToOS(Sci->FreeList, RegionSize, NumberOfRegions, BlockSize, - &Recorder, DecompactPtr, SkipRegion); + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + if (Recorder.getReleasedRangesCount() > 0) { - Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks; + Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); Sci->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); TotalReleasedBytes += Sci->ReleaseInfo.LastReleasedBytes; } - Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); + Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); return TotalReleasedBytes; } + bool hasChanceToReleasePages(SizeClassInfo *Sci, uptr BlockSize, + uptr BytesInFreeList, ReleaseToOS ReleaseType) + REQUIRES(Sci->Mutex) { + DCHECK_GE(Sci->FreeListInfo.PoppedBlocks, Sci->FreeListInfo.PushedBlocks); + const uptr PageSize = getPageSizeCached(); + + if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) + Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + + // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value + // so that we won't underestimate the releasable pages. For example, the + // following is the region usage, + // + // BytesInFreeListAtLastCheckpoint AllocatedUser + // v v + // |---------------------------------------> + // ^ ^ + // BytesInFreeList ReleaseThreshold + // + // In general, if we have collected enough bytes and the amount of free + // bytes meets the ReleaseThreshold, we will try to do page release. If we + // don't update `BytesInFreeListAtLastCheckpoint` when the current + // `BytesInFreeList` is smaller, we may take longer time to wait for enough + // freed blocks because we miss the bytes between + // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). + const uptr PushedBytesDelta = + BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + if (PushedBytesDelta < PageSize) + return false; + + // Releasing smaller blocks is expensive, so we want to make sure that a + // significant amount of bytes are free, and that there has been a good + // amount of batches pushed to the freelist before attempting to release. + if (isSmallBlock(BlockSize) && ReleaseType == ReleaseToOS::Normal) + if (PushedBytesDelta < Sci->AllocatedUser / 16U) + return false; + + if (ReleaseType == ReleaseToOS::Normal) { + const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); + if (IntervalMs < 0) + return false; + + // The constant 8 here is selected from profiling some apps and the number + // of unreleased pages in the large size classes is around 16 pages or + // more. Choose half of it as a heuristic and which also avoids page + // release every time for every pushBlocks() attempt by large blocks. + const bool ByPassReleaseInterval = + isLargeBlock(BlockSize) && PushedBytesDelta > 8 * PageSize; + if (!ByPassReleaseInterval) { + if (Sci->ReleaseInfo.LastReleaseAtNs + + static_cast(IntervalMs) * 1000000 > + getMonotonicTimeFast()) { + // Memory was returned recently. + return false; + } + } + } // if (ReleaseType == ReleaseToOS::Normal) + + return true; + } + + PageReleaseContext markFreeBlocks(SizeClassInfo *Sci, const uptr ClassId, + const uptr BlockSize, const uptr Base, + const uptr NumberOfRegions, + ReleaseToOS ReleaseType) + REQUIRES(Sci->Mutex) { + const uptr PageSize = getPageSizeCached(); + const uptr GroupSize = (1UL << GroupSizeLog); + const uptr CurGroupBase = + compactPtrGroupBase(compactPtr(ClassId, Sci->CurrentRegion)); + + PageReleaseContext Context(BlockSize, NumberOfRegions, + /*ReleaseSize=*/RegionSize); + + auto DecompactPtr = [](CompactPtrT CompactPtr) { + return reinterpret_cast(CompactPtr); + }; + for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) { + const uptr GroupBase = decompactGroupBase(BG.CompactPtrGroupBase); + // The `GroupSize` may not be divided by `BlockSize`, which means there is + // an unused space at the end of Region. Exclude that space to avoid + // unused page map entry. + uptr AllocatedGroupSize = GroupBase == CurGroupBase + ? Sci->CurrentRegionAllocated + : roundDownSlow(GroupSize, BlockSize); + if (AllocatedGroupSize == 0) + continue; + + // TransferBatches are pushed in front of BG.Batches. The first one may + // not have all caches used. + const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + + BG.Batches.front()->getCount(); + const uptr BytesInBG = NumBlocks * BlockSize; + + if (ReleaseType != ReleaseToOS::ForceAll) { + if (BytesInBG <= BG.BytesInBGAtLastCheckpoint) { + BG.BytesInBGAtLastCheckpoint = BytesInBG; + continue; + } + + const uptr PushedBytesDelta = BytesInBG - BG.BytesInBGAtLastCheckpoint; + if (PushedBytesDelta < PageSize) + continue; + + // Given the randomness property, we try to release the pages only if + // the bytes used by free blocks exceed certain proportion of allocated + // spaces. + if (isSmallBlock(BlockSize) && (BytesInBG * 100U) / AllocatedGroupSize < + (100U - 1U - BlockSize / 16U)) { + continue; + } + } + + // TODO: Consider updating this after page release if `ReleaseRecorder` + // can tell the released bytes in each group. + BG.BytesInBGAtLastCheckpoint = BytesInBG; + + const uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; + const uptr RegionIndex = (GroupBase - Base) / RegionSize; + + if (NumBlocks == MaxContainedBlocks) { + for (const auto &It : BG.Batches) + for (u16 I = 0; I < It.getCount(); ++I) + DCHECK_EQ(compactPtrGroupBase(It.get(I)), BG.CompactPtrGroupBase); + + const uptr To = GroupBase + AllocatedGroupSize; + Context.markRangeAsAllCounted(GroupBase, To, GroupBase, RegionIndex, + AllocatedGroupSize); + } else { + DCHECK_LT(NumBlocks, MaxContainedBlocks); + + // Note that we don't always visit blocks in each BatchGroup so that we + // may miss the chance of releasing certain pages that cross + // BatchGroups. + Context.markFreeBlocksInRegion(BG.Batches, DecompactPtr, GroupBase, + RegionIndex, AllocatedGroupSize, + /*MayContainLastBlockInRegion=*/true); + } + + // We may not be able to do the page release In a rare case that we may + // fail on PageMap allocation. + if (UNLIKELY(!Context.hasBlockMarked())) + break; + } + + return Context; + } + SizeClassInfo SizeClassInfoArray[NumClasses] = {}; + HybridMutex ByteMapMutex; // Track the regions in use, 0 is unused, otherwise store ClassId + 1. - ByteMap PossibleRegions = {}; + ByteMap PossibleRegions GUARDED_BY(ByteMapMutex) = {}; atomic_s32 ReleaseToOsIntervalMs = {}; // Unless several threads request regions simultaneously from different size // classes, the stash rarely contains more than 1 entry. static constexpr uptr MaxStashedRegions = 4; HybridMutex RegionsStashMutex; - uptr NumberOfStashedRegions = 0; - uptr RegionsStash[MaxStashedRegions] = {}; + uptr NumberOfStashedRegions GUARDED_BY(RegionsStashMutex) = 0; + uptr RegionsStash[MaxStashedRegions] GUARDED_BY(RegionsStashMutex) = {}; }; } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/primary64.h b/third_party/llvm-scudo-standalone/primary64.h index 13420bf..9a642d2 100644 --- a/third_party/llvm-scudo-standalone/primary64.h +++ b/third_party/llvm-scudo-standalone/primary64.h @@ -9,15 +9,20 @@ #ifndef SCUDO_PRIMARY64_H_ #define SCUDO_PRIMARY64_H_ +#include "allocator_common.h" #include "bytemap.h" #include "common.h" #include "list.h" #include "local_cache.h" +#include "mem_map.h" #include "memtag.h" #include "options.h" #include "release.h" #include "stats.h" #include "string_utils.h" +#include "thread_annotations.h" + +#include "condition_variable.h" namespace scudo { @@ -43,99 +48,309 @@ namespace scudo { template class SizeClassAllocator64 { public: - typedef typename Config::PrimaryCompactPtrT CompactPtrT; - static const uptr CompactPtrScale = Config::PrimaryCompactPtrScale; - typedef typename Config::SizeClassMap SizeClassMap; + typedef typename Config::Primary::CompactPtrT CompactPtrT; + typedef typename Config::Primary::SizeClassMap SizeClassMap; + typedef typename ConditionVariableState< + typename Config::Primary>::ConditionVariableT ConditionVariableT; + static const uptr CompactPtrScale = Config::Primary::CompactPtrScale; + static const uptr RegionSizeLog = Config::Primary::RegionSizeLog; + static const uptr GroupSizeLog = Config::Primary::GroupSizeLog; + static_assert(RegionSizeLog >= GroupSizeLog, + "Group size shouldn't be greater than the region size"); + static const uptr GroupScale = GroupSizeLog - CompactPtrScale; typedef SizeClassAllocator64 ThisT; typedef SizeClassAllocatorLocalCache CacheT; - typedef typename CacheT::TransferBatch TransferBatch; + typedef TransferBatch TransferBatchT; + typedef BatchGroup BatchGroupT; + + static_assert(sizeof(BatchGroupT) <= sizeof(TransferBatchT), + "BatchGroupT uses the same class size as TransferBatchT"); static uptr getSizeByClassId(uptr ClassId) { return (ClassId == SizeClassMap::BatchClassId) - ? roundUpTo(sizeof(TransferBatch), 1U << CompactPtrScale) + ? roundUp(sizeof(TransferBatchT), 1U << CompactPtrScale) : SizeClassMap::getSizeByClassId(ClassId); } static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } - void init(s32 ReleaseToOsInterval) { + static bool conditionVariableEnabled() { + return ConditionVariableState::enabled(); + } + + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); - DCHECK_EQ(PrimaryBase, 0U); + + const uptr PageSize = getPageSizeCached(); + const uptr GroupSize = (1UL << GroupSizeLog); + const uptr PagesInGroup = GroupSize / PageSize; + const uptr MinSizeClass = getSizeByClassId(1); + // When trying to release pages back to memory, visiting smaller size + // classes is expensive. Therefore, we only try to release smaller size + // classes when the amount of free blocks goes over a certain threshold (See + // the comment in releaseToOSMaybe() for more details). For example, for + // size class 32, we only do the release when the size of free blocks is + // greater than 97% of pages in a group. However, this may introduce another + // issue that if the number of free blocks is bouncing between 97% ~ 100%. + // Which means we may try many page releases but only release very few of + // them (less than 3% in a group). Even though we have + // `&ReleaseToOsIntervalMs` which slightly reduce the frequency of these + // calls but it will be better to have another guard to mitigate this issue. + // + // Here we add another constraint on the minimum size requirement. The + // constraint is determined by the size of in-use blocks in the minimal size + // class. Take size class 32 as an example, + // + // +- one memory group -+ + // +----------------------+------+ + // | 97% of free blocks | | + // +----------------------+------+ + // \ / + // 3% in-use blocks + // + // * The release size threshold is 97%. + // + // The 3% size in a group is about 7 pages. For two consecutive + // releaseToOSMaybe(), we require the difference between `PushedBlocks` + // should be greater than 7 pages. This mitigates the page releasing + // thrashing which is caused by memory usage bouncing around the threshold. + // The smallest size class takes longest time to do the page release so we + // use its size of in-use blocks as a heuristic. + SmallerBlockReleasePageDelta = + PagesInGroup * (1 + MinSizeClass / 16U) / 100; + // Reserve the space required for the Primary. - PrimaryBase = reinterpret_cast( - map(nullptr, PrimarySize, nullptr, MAP_NOACCESS, &Data)); + CHECK(ReservedMemory.create(/*Addr=*/0U, PrimarySize, + "scudo:primary_reserve")); + PrimaryBase = ReservedMemory.getBase(); + DCHECK_NE(PrimaryBase, 0U); u32 Seed; - const u64 Time = getMonotonicTime(); + const u64 Time = getMonotonicTimeFast(); if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) Seed = static_cast(Time ^ (PrimaryBase >> 12)); - const uptr PageSize = getPageSizeCached(); + for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); + // The actual start of a region is offset by a random number of pages // when PrimaryEnableRandomOffset is set. - Region->RegionBeg = getRegionBaseByClassId(I) + - (Config::PrimaryEnableRandomOffset + Region->RegionBeg = (PrimaryBase + (I << RegionSizeLog)) + + (Config::Primary::EnableRandomOffset ? ((getRandomModN(&Seed, 16) + 1) * PageSize) : 0); Region->RandState = getRandomU32(&Seed); + // Releasing small blocks is expensive, set a higher threshold to avoid + // frequent page releases. + if (isSmallBlock(getSizeByClassId(I))) + Region->TryReleaseThreshold = PageSize * SmallerBlockReleasePageDelta; + else + Region->TryReleaseThreshold = PageSize; Region->ReleaseInfo.LastReleaseAtNs = Time; + + Region->MemMapInfo.MemMap = ReservedMemory.dispatch( + PrimaryBase + (I << RegionSizeLog), RegionSize); + CHECK(Region->MemMapInfo.MemMap.isAllocated()); } + shuffle(RegionInfoArray, NumClasses, &Seed); + + // The binding should be done after region shuffling so that it won't bind + // the FLLock from the wrong region. + for (uptr I = 0; I < NumClasses; I++) + getRegionInfo(I)->FLLockCV.bindTestOnly(getRegionInfo(I)->FLLock); + setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } - void unmapTestOnly() { + void unmapTestOnly() NO_THREAD_SAFETY_ANALYSIS { for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); *Region = {}; } - unmap(reinterpret_cast(PrimaryBase), PrimarySize, UNMAP_ALL, &Data); + if (PrimaryBase) + ReservedMemory.release(); PrimaryBase = 0U; } - TransferBatch *popBatch(CacheT *C, uptr ClassId) { + // When all blocks are freed, it has to be the same size as `AllocatedUser`. + void verifyAllBlocksAreReleasedTestOnly() { + // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass. + uptr BatchClassUsedInFreeLists = 0; + for (uptr I = 0; I < NumClasses; I++) { + // We have to count BatchClassUsedInFreeLists in other regions first. + if (I == SizeClassMap::BatchClassId) + continue; + RegionInfo *Region = getRegionInfo(I); + ScopedLock ML(Region->MMLock); + ScopedLock FL(Region->FLLock); + const uptr BlockSize = getSizeByClassId(I); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { + // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`. + BatchClassUsedInFreeLists += BG.Batches.size() + 1; + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } + + DCHECK_EQ(TotalBlocks, Region->MemMapInfo.AllocatedUser / BlockSize); + DCHECK_EQ(Region->FreeListInfo.PushedBlocks, + Region->FreeListInfo.PoppedBlocks); + } + + RegionInfo *Region = getRegionInfo(SizeClassMap::BatchClassId); + ScopedLock ML(Region->MMLock); + ScopedLock FL(Region->FLLock); + const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId); + uptr TotalBlocks = 0; + for (BatchGroupT &BG : Region->FreeListInfo.BlockList) { + if (LIKELY(!BG.Batches.empty())) { + for (const auto &It : BG.Batches) + TotalBlocks += It.getCount(); + } else { + // `BatchGroup` with empty freelist doesn't have `TransferBatch` record + // itself. + ++TotalBlocks; + } + } + DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists, + Region->MemMapInfo.AllocatedUser / BlockSize); + DCHECK_GE(Region->FreeListInfo.PoppedBlocks, + Region->FreeListInfo.PushedBlocks); + const uptr BlocksInUse = + Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; + DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists); + } + + // Note that the `MaxBlockCount` will be used when we support arbitrary blocks + // count. Now it's the same as the number of blocks stored in the + // `TransferBatch`. + u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray, + UNUSED const u16 MaxBlockCount) { + TransferBatchT *B = popBatch(C, ClassId); + if (!B) + return 0; + + const u16 Count = B->getCount(); + DCHECK_GT(Count, 0U); + B->moveToArray(ToArray); + + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, B); + + return Count; + } + + TransferBatchT *popBatch(CacheT *C, uptr ClassId) { DCHECK_LT(ClassId, NumClasses); RegionInfo *Region = getRegionInfo(ClassId); - ScopedLock L(Region->Mutex); - TransferBatch *B = Region->FreeList.front(); - if (B) { - Region->FreeList.pop_front(); + + { + ScopedLock L(Region->FLLock); + TransferBatchT *B = popBatchImpl(C, ClassId, Region); + if (LIKELY(B)) + return B; + } + + bool ReportRegionExhausted = false; + TransferBatchT *B = nullptr; + + if (conditionVariableEnabled()) { + B = popBatchWithCV(C, ClassId, Region, ReportRegionExhausted); } else { - B = populateFreeList(C, ClassId, Region); - if (UNLIKELY(!B)) - return nullptr; + while (true) { + // When two threads compete for `Region->MMLock`, we only want one of + // them to call populateFreeListAndPopBatch(). To avoid both of them + // doing that, always check the freelist before mapping new pages. + ScopedLock ML(Region->MMLock); + { + ScopedLock FL(Region->FLLock); + if ((B = popBatchImpl(C, ClassId, Region))) + break; + } + + const bool RegionIsExhausted = Region->Exhausted; + if (!RegionIsExhausted) + B = populateFreeListAndPopBatch(C, ClassId, Region); + ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; + break; + } + } + + if (UNLIKELY(ReportRegionExhausted)) { + Printf("Can't populate more pages for size class %zu.\n", + getSizeByClassId(ClassId)); + + // Theoretically, BatchClass shouldn't be used up. Abort immediately when + // it happens. + if (ClassId == SizeClassMap::BatchClassId) + reportOutOfBatchClass(); } - DCHECK_GT(B->getCount(), 0); - Region->Stats.PoppedBlocks += B->getCount(); + return B; } - void pushBatch(uptr ClassId, TransferBatch *B) { - DCHECK_GT(B->getCount(), 0); + // Push the array of free blocks to the designated batch group. + void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) { + DCHECK_LT(ClassId, NumClasses); + DCHECK_GT(Size, 0); + RegionInfo *Region = getRegionInfo(ClassId); - ScopedLock L(Region->Mutex); - Region->FreeList.push_front(B); - Region->Stats.PushedBlocks += B->getCount(); - if (ClassId != SizeClassMap::BatchClassId) - releaseToOSMaybe(Region, ClassId); + if (ClassId == SizeClassMap::BatchClassId) { + ScopedLock L(Region->FLLock); + pushBatchClassBlocks(Region, Array, Size); + if (conditionVariableEnabled()) + Region->FLLockCV.notifyAll(Region->FLLock); + return; + } + + // TODO(chiahungduan): Consider not doing grouping if the group size is not + // greater than the block size with a certain scale. + + bool SameGroup = true; + if (GroupSizeLog < RegionSizeLog) { + // Sort the blocks so that blocks belonging to the same group can be + // pushed together. + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) + SameGroup = false; + CompactPtrT Cur = Array[I]; + u32 J = I; + while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) { + Array[J] = Array[J - 1]; + --J; + } + Array[J] = Cur; + } + } + + { + ScopedLock L(Region->FLLock); + pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup); + if (conditionVariableEnabled()) + Region->FLLockCV.notifyAll(Region->FLLock); + } } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { // The BatchClassId must be locked last since other classes can use it. for (sptr I = static_cast(NumClasses) - 1; I >= 0; I--) { if (static_cast(I) == SizeClassMap::BatchClassId) continue; - getRegionInfo(static_cast(I))->Mutex.lock(); + getRegionInfo(static_cast(I))->MMLock.lock(); + getRegionInfo(static_cast(I))->FLLock.lock(); } - getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock(); + getRegionInfo(SizeClassMap::BatchClassId)->MMLock.lock(); + getRegionInfo(SizeClassMap::BatchClassId)->FLLock.lock(); } - void enable() { - getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); + void enable() NO_THREAD_SAFETY_ANALYSIS { + getRegionInfo(SizeClassMap::BatchClassId)->FLLock.unlock(); + getRegionInfo(SizeClassMap::BatchClassId)->MMLock.unlock(); for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; - getRegionInfo(I)->Mutex.unlock(); + getRegionInfo(I)->FLLock.unlock(); + getRegionInfo(I)->MMLock.unlock(); } } @@ -143,10 +358,15 @@ template class SizeClassAllocator64 { for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; - const RegionInfo *Region = getRegionInfo(I); + RegionInfo *Region = getRegionInfo(I); + // TODO: The call of `iterateOverBlocks` requires disabling + // SizeClassAllocator64. We may consider locking each region on demand + // only. + Region->FLLock.assertHeld(); + Region->MMLock.assertHeld(); const uptr BlockSize = getSizeByClassId(I); const uptr From = Region->RegionBeg; - const uptr To = From + Region->AllocatedUser; + const uptr To = From + Region->MemMapInfo.AllocatedUser; for (uptr Block = From; Block < To; Block += BlockSize) Callback(Block); } @@ -159,25 +379,46 @@ template class SizeClassAllocator64 { uptr PushedBlocks = 0; for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); - if (Region->MappedUser) - TotalMapped += Region->MappedUser; - PoppedBlocks += Region->Stats.PoppedBlocks; - PushedBlocks += Region->Stats.PushedBlocks; + { + ScopedLock L(Region->MMLock); + TotalMapped += Region->MemMapInfo.MappedUser; + } + { + ScopedLock L(Region->FLLock); + PoppedBlocks += Region->FreeListInfo.PoppedBlocks; + PushedBlocks += Region->FreeListInfo.PushedBlocks; + } } - Str->append("Stats: SizeClassAllocator64: %zuM mapped (%zuM rss) in %zu " + Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu " "allocations; remains %zu\n", - TotalMapped >> 20, 0, PoppedBlocks, + TotalMapped >> 20, 0U, PoppedBlocks, PoppedBlocks - PushedBlocks); - for (uptr I = 0; I < NumClasses; I++) - getStats(Str, I, 0); + for (uptr I = 0; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + ScopedLock L1(Region->MMLock); + ScopedLock L2(Region->FLLock); + getStats(Str, I, Region); + } + } + + void getFragmentationInfo(ScopedString *Str) { + Str->append( + "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n", + getPageSizeCached()); + + for (uptr I = 1; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + ScopedLock L(Region->MMLock); + getRegionFragmentationInfo(Region, I, Str); + } } bool setOption(Option O, sptr Value) { if (O == Option::ReleaseInterval) { - const s32 Interval = Max( - Min(static_cast(Value), Config::PrimaryMaxReleaseToOsIntervalMs), - Config::PrimaryMinReleaseToOsIntervalMs); + const s32 Interval = Max(Min(static_cast(Value), + Config::Primary::MaxReleaseToOsIntervalMs), + Config::Primary::MinReleaseToOsIntervalMs); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; } @@ -185,14 +426,27 @@ template class SizeClassAllocator64 { return true; } - uptr releaseToOS() { + uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) { + RegionInfo *Region = getRegionInfo(ClassId); + // Note that the tryLock() may fail spuriously, given that it should rarely + // happen and page releasing is fine to skip, we don't take certain + // approaches to ensure one page release is done. + if (Region->MMLock.tryLock()) { + uptr BytesReleased = releaseToOSMaybe(Region, ClassId, ReleaseType); + Region->MMLock.unlock(); + return BytesReleased; + } + return 0; + } + + uptr releaseToOS(ReleaseToOS ReleaseType) { uptr TotalReleasedBytes = 0; for (uptr I = 0; I < NumClasses; I++) { if (I == SizeClassMap::BatchClassId) continue; RegionInfo *Region = getRegionInfo(I); - ScopedLock L(Region->Mutex); - TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true); + ScopedLock L(Region->MMLock); + TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType); } return TotalReleasedBytes; } @@ -204,9 +458,6 @@ template class SizeClassAllocator64 { static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); } uptr getCompactPtrBaseByClassId(uptr ClassId) { - // If we are not compacting pointers, base everything off of 0. - if (sizeof(CompactPtrT) == sizeof(uptr) && CompactPtrScale == 0) - return 0; return getRegionInfo(ClassId)->RegionBeg; } @@ -221,16 +472,24 @@ template class SizeClassAllocator64 { decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr)); } - static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { + static BlockInfo findNearestBlock(const char *RegionInfoData, + uptr Ptr) NO_THREAD_SAFETY_ANALYSIS { const RegionInfo *RegionInfoArray = reinterpret_cast(RegionInfoData); + uptr ClassId; uptr MinDistance = -1UL; for (uptr I = 0; I != NumClasses; ++I) { if (I == SizeClassMap::BatchClassId) continue; uptr Begin = RegionInfoArray[I].RegionBeg; - uptr End = Begin + RegionInfoArray[I].AllocatedUser; + // TODO(chiahungduan): In fact, We need to lock the RegionInfo::MMLock. + // However, the RegionInfoData is passed with const qualifier and lock the + // mutex requires modifying RegionInfoData, which means we need to remove + // the const qualifier. This may lead to another undefined behavior (The + // first one is accessing `AllocatedUser` without locking. It's better to + // pass `RegionInfoData` as `void *` then we can lock the mutex properly. + uptr End = Begin + RegionInfoArray[I].MemMapInfo.AllocatedUser; if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) continue; uptr RegionDistance; @@ -252,7 +511,8 @@ template class SizeClassAllocator64 { BlockInfo B = {}; if (MinDistance <= 8192) { B.RegionBegin = RegionInfoArray[ClassId].RegionBeg; - B.RegionEnd = B.RegionBegin + RegionInfoArray[ClassId].AllocatedUser; + B.RegionEnd = + B.RegionBegin + RegionInfoArray[ClassId].MemMapInfo.AllocatedUser; B.BlockSize = SizeClassMap::getSizeByClassId(ClassId); B.BlockBegin = B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) * @@ -268,37 +528,51 @@ template class SizeClassAllocator64 { AtomicOptions Options; private: - static const uptr RegionSize = 1UL << Config::PrimaryRegionSizeLog; + static const uptr RegionSize = 1UL << RegionSizeLog; static const uptr NumClasses = SizeClassMap::NumClasses; static const uptr PrimarySize = RegionSize * NumClasses; - static const uptr MapSizeIncrement = Config::PrimaryMapSizeIncrement; + static const uptr MapSizeIncrement = Config::Primary::MapSizeIncrement; // Fill at most this number of batches from the newly map'd memory. static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; - struct RegionStats { - uptr PoppedBlocks; - uptr PushedBlocks; - }; - struct ReleaseToOsInfo { - uptr PushedBlocksAtLastRelease; + uptr BytesInFreeListAtLastCheckpoint; uptr RangesReleased; uptr LastReleasedBytes; u64 LastReleaseAtNs; }; + struct BlocksInfo { + SinglyLinkedList BlockList = {}; + uptr PoppedBlocks = 0; + uptr PushedBlocks = 0; + }; + + struct PagesInfo { + MemMapT MemMap = {}; + // Bytes mapped for user memory. + uptr MappedUser = 0; + // Bytes allocated for user memory. + uptr AllocatedUser = 0; + }; + struct UnpaddedRegionInfo { - HybridMutex Mutex; - SinglyLinkedList FreeList; + // Mutex for operations on freelist + HybridMutex FLLock; + ConditionVariableT FLLockCV GUARDED_BY(FLLock); + // Mutex for memmap operations + HybridMutex MMLock ACQUIRED_BEFORE(FLLock); + // `RegionBeg` is initialized before thread creation and won't be changed. uptr RegionBeg = 0; - RegionStats Stats = {}; - u32 RandState = 0; - uptr MappedUser = 0; // Bytes mapped for user memory. - uptr AllocatedUser = 0; // Bytes allocated for user memory. - MapPlatformData Data = {}; - ReleaseToOsInfo ReleaseInfo = {}; - bool Exhausted = false; + u32 RandState GUARDED_BY(MMLock) = 0; + BlocksInfo FreeListInfo GUARDED_BY(FLLock); + PagesInfo MemMapInfo GUARDED_BY(MMLock); + // The minimum size of pushed blocks to trigger page release. + uptr TryReleaseThreshold GUARDED_BY(MMLock) = 0; + ReleaseToOsInfo ReleaseInfo GUARDED_BY(MMLock) = {}; + bool Exhausted GUARDED_BY(MMLock) = false; + bool isPopulatingFreeList GUARDED_BY(FLLock) = false; }; struct RegionInfo : UnpaddedRegionInfo { char Padding[SCUDO_CACHE_LINE_SIZE - @@ -306,18 +580,15 @@ template class SizeClassAllocator64 { }; static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); - uptr PrimaryBase = 0; - MapPlatformData Data = {}; - atomic_s32 ReleaseToOsIntervalMs = {}; - alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; - RegionInfo *getRegionInfo(uptr ClassId) { DCHECK_LT(ClassId, NumClasses); return &RegionInfoArray[ClassId]; } - uptr getRegionBaseByClassId(uptr ClassId) const { - return PrimaryBase + (ClassId << Config::PrimaryRegionSizeLog); + uptr getRegionBaseByClassId(uptr ClassId) { + return roundDown(getRegionInfo(ClassId)->RegionBeg - PrimaryBase, + RegionSize) + + PrimaryBase; } static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) { @@ -328,160 +599,1088 @@ template class SizeClassAllocator64 { return Base + (static_cast(CompactPtr) << CompactPtrScale); } - NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, - RegionInfo *Region) { + static uptr compactPtrGroup(CompactPtrT CompactPtr) { + const uptr Mask = (static_cast(1) << GroupScale) - 1; + return static_cast(CompactPtr) & ~Mask; + } + static uptr decompactGroupBase(uptr Base, uptr CompactPtrGroupBase) { + DCHECK_EQ(CompactPtrGroupBase % (static_cast(1) << (GroupScale)), 0U); + return Base + (CompactPtrGroupBase << CompactPtrScale); + } + + ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize < PageSize / 16U; + } + + ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize > PageSize; + } + + void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size) + REQUIRES(Region->FLLock) { + DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId)); + + // Free blocks are recorded by TransferBatch in freelist for all + // size-classes. In addition, TransferBatch is allocated from BatchClassId. + // In order not to use additional block to record the free blocks in + // BatchClassId, they are self-contained. I.e., A TransferBatch records the + // block address of itself. See the figure below: + // + // TransferBatch at 0xABCD + // +----------------------------+ + // | Free blocks' addr | + // | +------+------+------+ | + // | |0xABCD|... |... | | + // | +------+------+------+ | + // +----------------------------+ + // + // When we allocate all the free blocks in the TransferBatch, the block used + // by TransferBatch is also free for use. We don't need to recycle the + // TransferBatch. Note that the correctness is maintained by the invariant, + // + // The unit of each popBatch() request is entire TransferBatch. Return + // part of the blocks in a TransferBatch is invalid. + // + // This ensures that TransferBatch won't leak the address itself while it's + // still holding other valid data. + // + // Besides, BatchGroup is also allocated from BatchClassId and has its + // address recorded in the TransferBatch too. To maintain the correctness, + // + // The address of BatchGroup is always recorded in the last TransferBatch + // in the freelist (also imply that the freelist should only be + // updated with push_front). Once the last TransferBatch is popped, + // the block used by BatchGroup is also free for use. + // + // With this approach, the blocks used by BatchGroup and TransferBatch are + // reusable and don't need additional space for them. + + Region->FreeListInfo.PushedBlocks += Size; + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + + if (BG == nullptr) { + // Construct `BatchGroup` on the last element. + BG = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + --Size; + BG->Batches.clear(); + // BatchClass hasn't enabled memory group. Use `0` to indicate there's no + // memory group here. + BG->CompactPtrGroupBase = 0; + // `BG` is also the block of BatchClassId. Note that this is different + // from `CreateGroup` in `pushBlocksImpl` + BG->PushedBlocks = 1; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = + CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId)); + + Region->FreeListInfo.BlockList.push_front(BG); + } + + if (UNLIKELY(Size == 0)) + return; + + // This happens under 2 cases. + // 1. just allocated a new `BatchGroup`. + // 2. Only 1 block is pushed when the freelist is empty. + if (BG->Batches.empty()) { + // Construct the `TransferBatch` on the last element. + TransferBatchT *TB = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1])); + TB->clear(); + // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are + // recorded in the TransferBatch. + TB->add(Array[Size - 1]); + TB->add( + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(BG))); + --Size; + DCHECK_EQ(BG->PushedBlocks, 1U); + // `TB` is also the block of BatchClassId. + BG->PushedBlocks += 1; + BG->Batches.push_front(TB); + } + + TransferBatchT *CurBatch = BG->Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + u16 UnusedSlots = + static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = reinterpret_cast( + decompactPtr(SizeClassMap::BatchClassId, Array[I])); + CurBatch->clear(); + // Self-contained + CurBatch->add(Array[I]); + ++I; + // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of + // BatchClassId. + BG->Batches.push_front(CurBatch); + UnusedSlots = static_cast(BG->MaxCachedPerBatch - 1); + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + const u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } + + BG->PushedBlocks += Size; + } + + // Push the blocks to their batch group. The layout will be like, + // + // FreeListInfo.BlockList - > BG -> BG -> BG + // | | | + // v v v + // TB TB TB + // | + // v + // TB + // + // Each BlockGroup(BG) will associate with unique group id and the free blocks + // are managed by a list of TransferBatch(TB). To reduce the time of inserting + // blocks, BGs are sorted and the input `Array` are supposed to be sorted so + // that we can get better performance of maintaining sorted property. + // Use `SameGroup=true` to indicate that all blocks in the array are from the + // same group then we will skip checking the group id of each block. + void pushBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region, + CompactPtrT *Array, u32 Size, bool SameGroup = false) + REQUIRES(Region->FLLock) { + DCHECK_NE(ClassId, SizeClassMap::BatchClassId); + DCHECK_GT(Size, 0U); + + auto CreateGroup = [&](uptr CompactPtrGroupBase) { + BatchGroupT *BG = + reinterpret_cast(C->getBatchClassBlock()); + BG->Batches.clear(); + TransferBatchT *TB = + reinterpret_cast(C->getBatchClassBlock()); + TB->clear(); + + BG->CompactPtrGroupBase = CompactPtrGroupBase; + BG->Batches.push_front(TB); + BG->PushedBlocks = 0; + BG->BytesInBGAtLastCheckpoint = 0; + BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId)); + + return BG; + }; + + auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) { + SinglyLinkedList &Batches = BG->Batches; + TransferBatchT *CurBatch = Batches.front(); + DCHECK_NE(CurBatch, nullptr); + + for (u32 I = 0; I < Size;) { + DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount()); + u16 UnusedSlots = + static_cast(BG->MaxCachedPerBatch - CurBatch->getCount()); + if (UnusedSlots == 0) { + CurBatch = + reinterpret_cast(C->getBatchClassBlock()); + CurBatch->clear(); + Batches.push_front(CurBatch); + UnusedSlots = BG->MaxCachedPerBatch; + } + // `UnusedSlots` is u16 so the result will be also fit in u16. + u16 AppendSize = static_cast(Min(UnusedSlots, Size - I)); + CurBatch->appendFromArray(&Array[I], AppendSize); + I += AppendSize; + } + + BG->PushedBlocks += Size; + }; + + Region->FreeListInfo.PushedBlocks += Size; + BatchGroupT *Cur = Region->FreeListInfo.BlockList.front(); + + // In the following, `Cur` always points to the BatchGroup for blocks that + // will be pushed next. `Prev` is the element right before `Cur`. + BatchGroupT *Prev = nullptr; + + while (Cur != nullptr && + compactPtrGroup(Array[0]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } + + if (Cur == nullptr || + compactPtrGroup(Array[0]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroup(Array[0])); + if (Prev == nullptr) + Region->FreeListInfo.BlockList.push_front(Cur); + else + Region->FreeListInfo.BlockList.insert(Prev, Cur); + } + + // All the blocks are from the same group, just push without checking group + // id. + if (SameGroup) { + for (u32 I = 0; I < Size; ++I) + DCHECK_EQ(compactPtrGroup(Array[I]), Cur->CompactPtrGroupBase); + + InsertBlocks(Cur, Array, Size); + return; + } + + // The blocks are sorted by group id. Determine the segment of group and + // push them to their group together. + u32 Count = 1; + for (u32 I = 1; I < Size; ++I) { + if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) { + DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->CompactPtrGroupBase); + InsertBlocks(Cur, Array + I - Count, Count); + + while (Cur != nullptr && + compactPtrGroup(Array[I]) > Cur->CompactPtrGroupBase) { + Prev = Cur; + Cur = Cur->Next; + } + + if (Cur == nullptr || + compactPtrGroup(Array[I]) != Cur->CompactPtrGroupBase) { + Cur = CreateGroup(compactPtrGroup(Array[I])); + DCHECK_NE(Prev, nullptr); + Region->FreeListInfo.BlockList.insert(Prev, Cur); + } + + Count = 1; + } else { + ++Count; + } + } + + InsertBlocks(Cur, Array + Size - Count, Count); + } + + TransferBatchT *popBatchWithCV(CacheT *C, uptr ClassId, RegionInfo *Region, + bool &ReportRegionExhausted) { + TransferBatchT *B = nullptr; + + while (true) { + // We only expect one thread doing the freelist refillment and other + // threads will be waiting for either the completion of the + // `populateFreeListAndPopBatch()` or `pushBlocks()` called by other + // threads. + bool PopulateFreeList = false; + { + ScopedLock FL(Region->FLLock); + if (!Region->isPopulatingFreeList) { + Region->isPopulatingFreeList = true; + PopulateFreeList = true; + } + } + + if (PopulateFreeList) { + ScopedLock ML(Region->MMLock); + + const bool RegionIsExhausted = Region->Exhausted; + if (!RegionIsExhausted) + B = populateFreeListAndPopBatch(C, ClassId, Region); + ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; + + { + // Before reacquiring the `FLLock`, the freelist may be used up again + // and some threads are waiting for the freelist refillment by the + // current thread. It's important to set + // `Region->isPopulatingFreeList` to false so the threads about to + // sleep will notice the status change. + ScopedLock FL(Region->FLLock); + Region->isPopulatingFreeList = false; + Region->FLLockCV.notifyAll(Region->FLLock); + } + + break; + } + + // At here, there are two preconditions to be met before waiting, + // 1. The freelist is empty. + // 2. Region->isPopulatingFreeList == true, i.e, someone is still doing + // `populateFreeListAndPopBatch()`. + // + // Note that it has the chance that freelist is empty but + // Region->isPopulatingFreeList == false because all the new populated + // blocks were used up right after the refillment. Therefore, we have to + // check if someone is still populating the freelist. + ScopedLock FL(Region->FLLock); + if (LIKELY(B = popBatchImpl(C, ClassId, Region))) + break; + + if (!Region->isPopulatingFreeList) + continue; + + // Now the freelist is empty and someone's doing the refillment. We will + // wait until anyone refills the freelist or someone finishes doing + // `populateFreeListAndPopBatch()`. The refillment can be done by + // `populateFreeListAndPopBatch()`, `pushBlocks()`, + // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`. + Region->FLLockCV.wait(Region->FLLock); + + if (LIKELY(B = popBatchImpl(C, ClassId, Region))) + break; + } + + return B; + } + + // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest + // group id will be considered first. + // + // The region mutex needs to be held while calling this method. + TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, RegionInfo *Region) + REQUIRES(Region->FLLock) { + if (Region->FreeListInfo.BlockList.empty()) + return nullptr; + + SinglyLinkedList &Batches = + Region->FreeListInfo.BlockList.front()->Batches; + + if (Batches.empty()) { + DCHECK_EQ(ClassId, SizeClassMap::BatchClassId); + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + Region->FreeListInfo.BlockList.pop_front(); + + // Block used by `BatchGroup` is from BatchClassId. Turn the block into + // `TransferBatch` with single block. + TransferBatchT *TB = reinterpret_cast(BG); + TB->clear(); + TB->add( + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(TB))); + Region->FreeListInfo.PoppedBlocks += 1; + return TB; + } + + TransferBatchT *B = Batches.front(); + Batches.pop_front(); + DCHECK_NE(B, nullptr); + DCHECK_GT(B->getCount(), 0U); + + if (Batches.empty()) { + BatchGroupT *BG = Region->FreeListInfo.BlockList.front(); + Region->FreeListInfo.BlockList.pop_front(); + + // We don't keep BatchGroup with zero blocks to avoid empty-checking while + // allocating. Note that block used by constructing BatchGroup is recorded + // as free blocks in the last element of BatchGroup::Batches. Which means, + // once we pop the last TransferBatch, the block is implicitly + // deallocated. + if (ClassId != SizeClassMap::BatchClassId) + C->deallocate(SizeClassMap::BatchClassId, BG); + } + + Region->FreeListInfo.PoppedBlocks += B->getCount(); + + return B; + } + + // Refill the freelist and return one batch. + NOINLINE TransferBatchT *populateFreeListAndPopBatch(CacheT *C, uptr ClassId, + RegionInfo *Region) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { const uptr Size = getSizeByClassId(ClassId); - const u32 MaxCount = TransferBatch::getMaxCached(Size); + const u16 MaxCount = CacheT::getMaxCached(Size); const uptr RegionBeg = Region->RegionBeg; - const uptr MappedUser = Region->MappedUser; - const uptr TotalUserBytes = Region->AllocatedUser + MaxCount * Size; + const uptr MappedUser = Region->MemMapInfo.MappedUser; + const uptr TotalUserBytes = + Region->MemMapInfo.AllocatedUser + MaxCount * Size; // Map more space for blocks, if necessary. if (TotalUserBytes > MappedUser) { // Do the mmap for the user memory. const uptr MapSize = - roundUpTo(TotalUserBytes - MappedUser, MapSizeIncrement); + roundUp(TotalUserBytes - MappedUser, MapSizeIncrement); const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) { - if (!Region->Exhausted) { - Region->Exhausted = true; - ScopedString Str; - getStats(&Str); - Str.append( - "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", - RegionSize >> 20, Size); - Str.output(); - } + Region->Exhausted = true; return nullptr; } - if (MappedUser == 0) - Region->Data = Data; - if (UNLIKELY(!map( - reinterpret_cast(RegionBeg + MappedUser), MapSize, - "scudo:primary", + + if (UNLIKELY(!Region->MemMapInfo.MemMap.remap( + RegionBeg + MappedUser, MapSize, "scudo:primary", MAP_ALLOWNOMEM | MAP_RESIZABLE | - (useMemoryTagging(Options.load()) ? MAP_MEMTAG : 0), - &Region->Data))) + (useMemoryTagging(Options.load()) ? MAP_MEMTAG + : 0)))) { return nullptr; - Region->MappedUser += MapSize; + } + Region->MemMapInfo.MappedUser += MapSize; C->getStats().add(StatMapped, MapSize); } - const u32 NumberOfBlocks = Min( - MaxNumBatches * MaxCount, - static_cast((Region->MappedUser - Region->AllocatedUser) / Size)); + const u32 NumberOfBlocks = + Min(MaxNumBatches * MaxCount, + static_cast((Region->MemMapInfo.MappedUser - + Region->MemMapInfo.AllocatedUser) / + Size)); DCHECK_GT(NumberOfBlocks, 0); constexpr u32 ShuffleArraySize = - MaxNumBatches * TransferBatch::MaxNumCached; + MaxNumBatches * TransferBatchT::MaxNumCached; CompactPtrT ShuffleArray[ShuffleArraySize]; DCHECK_LE(NumberOfBlocks, ShuffleArraySize); const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); - uptr P = RegionBeg + Region->AllocatedUser; + uptr P = RegionBeg + Region->MemMapInfo.AllocatedUser; for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P); - // No need to shuffle the batches size class. - if (ClassId != SizeClassMap::BatchClassId) - shuffle(ShuffleArray, NumberOfBlocks, &Region->RandState); - for (u32 I = 0; I < NumberOfBlocks;) { - TransferBatch *B = - C->createBatch(ClassId, reinterpret_cast(decompactPtrInternal( - CompactPtrBase, ShuffleArray[I]))); - if (UNLIKELY(!B)) - return nullptr; - const u32 N = Min(MaxCount, NumberOfBlocks - I); - B->setFromArray(&ShuffleArray[I], N); - Region->FreeList.push_back(B); - I += N; + + ScopedLock L(Region->FLLock); + + if (ClassId != SizeClassMap::BatchClassId) { + u32 N = 1; + uptr CurGroup = compactPtrGroup(ShuffleArray[0]); + for (u32 I = 1; I < NumberOfBlocks; I++) { + if (UNLIKELY(compactPtrGroup(ShuffleArray[I]) != CurGroup)) { + shuffle(ShuffleArray + I - N, N, &Region->RandState); + pushBlocksImpl(C, ClassId, Region, ShuffleArray + I - N, N, + /*SameGroup=*/true); + N = 1; + CurGroup = compactPtrGroup(ShuffleArray[I]); + } else { + ++N; + } + } + + shuffle(ShuffleArray + NumberOfBlocks - N, N, &Region->RandState); + pushBlocksImpl(C, ClassId, Region, &ShuffleArray[NumberOfBlocks - N], N, + /*SameGroup=*/true); + } else { + pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks); } - TransferBatch *B = Region->FreeList.front(); - Region->FreeList.pop_front(); - DCHECK(B); - DCHECK_GT(B->getCount(), 0); + + TransferBatchT *B = popBatchImpl(C, ClassId, Region); + DCHECK_NE(B, nullptr); + + // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record + // the requests from `PushBlocks` and `PopBatch` which are external + // interfaces. `populateFreeListAndPopBatch` is the internal interface so we + // should set the values back to avoid incorrectly setting the stats. + Region->FreeListInfo.PushedBlocks -= NumberOfBlocks; const uptr AllocatedUser = Size * NumberOfBlocks; C->getStats().add(StatFree, AllocatedUser); - Region->AllocatedUser += AllocatedUser; + Region->MemMapInfo.AllocatedUser += AllocatedUser; return B; } - void getStats(ScopedString *Str, uptr ClassId, uptr Rss) { - RegionInfo *Region = getRegionInfo(ClassId); - if (Region->MappedUser == 0) + void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region) + REQUIRES(Region->MMLock, Region->FLLock) { + if (Region->MemMapInfo.MappedUser == 0) return; - const uptr InUse = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks; - const uptr TotalChunks = Region->AllocatedUser / getSizeByClassId(ClassId); - Str->append("%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " - "inuse: %6zu total: %6zu rss: %6zuK releases: %6zu last " - "released: %6zuK region: 0x%zx (0x%zx)\n", - Region->Exhausted ? "F" : " ", ClassId, - getSizeByClassId(ClassId), Region->MappedUser >> 10, - Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks, InUse, - TotalChunks, Rss >> 10, Region->ReleaseInfo.RangesReleased, - Region->ReleaseInfo.LastReleasedBytes >> 10, Region->RegionBeg, - getRegionBaseByClassId(ClassId)); + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr InUseBlocks = + Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; + const uptr BytesInFreeList = + Region->MemMapInfo.AllocatedUser - InUseBlocks * BlockSize; + uptr RegionPushedBytesDelta = 0; + if (BytesInFreeList >= + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + RegionPushedBytesDelta = + BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + } + const uptr TotalChunks = Region->MemMapInfo.AllocatedUser / BlockSize; + Str->append( + "%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " + "inuse: %6zu total: %6zu releases: %6zu last " + "released: %6zuK latest pushed bytes: %6zuK region: 0x%zx (0x%zx)\n", + Region->Exhausted ? "E" : " ", ClassId, getSizeByClassId(ClassId), + Region->MemMapInfo.MappedUser >> 10, Region->FreeListInfo.PoppedBlocks, + Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks, + Region->ReleaseInfo.RangesReleased, + Region->ReleaseInfo.LastReleasedBytes >> 10, + RegionPushedBytesDelta >> 10, Region->RegionBeg, + getRegionBaseByClassId(ClassId)); + } + + void getRegionFragmentationInfo(RegionInfo *Region, uptr ClassId, + ScopedString *Str) REQUIRES(Region->MMLock) { + const uptr BlockSize = getSizeByClassId(ClassId); + const uptr AllocatedUserEnd = + Region->MemMapInfo.AllocatedUser + Region->RegionBeg; + + SinglyLinkedList GroupsToRelease; + { + ScopedLock L(Region->FLLock); + GroupsToRelease = Region->FreeListInfo.BlockList; + Region->FreeListInfo.BlockList.clear(); + } + + FragmentationRecorder Recorder; + if (!GroupsToRelease.empty()) { + PageReleaseContext Context = + markFreeBlocks(Region, BlockSize, AllocatedUserEnd, + getCompactPtrBaseByClassId(ClassId), GroupsToRelease); + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + + mergeGroupsToReleaseBack(Region, GroupsToRelease); + } + + ScopedLock L(Region->FLLock); + const uptr PageSize = getPageSizeCached(); + const uptr TotalBlocks = Region->MemMapInfo.AllocatedUser / BlockSize; + const uptr InUseBlocks = + Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks; + const uptr AllocatedPagesCount = + roundUp(Region->MemMapInfo.AllocatedUser, PageSize) / PageSize; + DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount()); + const uptr InUsePages = + AllocatedPagesCount - Recorder.getReleasedPagesCount(); + const uptr InUseBytes = InUsePages * PageSize; + + uptr Integral; + uptr Fractional; + computePercentage(BlockSize * InUseBlocks, InUsePages * PageSize, &Integral, + &Fractional); + Str->append(" %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total " + "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n", + ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages, + AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional); } NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, - bool Force = false) { + ReleaseToOS ReleaseType = ReleaseToOS::Normal) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { const uptr BlockSize = getSizeByClassId(ClassId); + uptr BytesInFreeList; + const uptr AllocatedUserEnd = + Region->MemMapInfo.AllocatedUser + Region->RegionBeg; + SinglyLinkedList GroupsToRelease; + + { + ScopedLock L(Region->FLLock); + + BytesInFreeList = Region->MemMapInfo.AllocatedUser - + (Region->FreeListInfo.PoppedBlocks - + Region->FreeListInfo.PushedBlocks) * + BlockSize; + if (UNLIKELY(BytesInFreeList == 0)) + return false; + + // ==================================================================== // + // 1. Check if we have enough free blocks and if it's worth doing a page + // release. + // ==================================================================== // + if (ReleaseType != ReleaseToOS::ForceAll && + !hasChanceToReleasePages(Region, BlockSize, BytesInFreeList, + ReleaseType)) { + return 0; + } + + // ==================================================================== // + // 2. Determine which groups can release the pages. Use a heuristic to + // gather groups that are candidates for doing a release. + // ==================================================================== // + if (ReleaseType == ReleaseToOS::ForceAll) { + GroupsToRelease = Region->FreeListInfo.BlockList; + Region->FreeListInfo.BlockList.clear(); + } else { + GroupsToRelease = + collectGroupsToRelease(Region, BlockSize, AllocatedUserEnd, + getCompactPtrBaseByClassId(ClassId)); + } + if (GroupsToRelease.empty()) + return 0; + } + + // Note that we have extracted the `GroupsToRelease` from region freelist. + // It's safe to let pushBlocks()/popBatches() access the remaining region + // freelist. In the steps 3 and 4, we will temporarily release the FLLock + // and lock it again before step 5. + + // ==================================================================== // + // 3. Mark the free blocks in `GroupsToRelease` in the `PageReleaseContext`. + // Then we can tell which pages are in-use by querying + // `PageReleaseContext`. + // ==================================================================== // + PageReleaseContext Context = + markFreeBlocks(Region, BlockSize, AllocatedUserEnd, + getCompactPtrBaseByClassId(ClassId), GroupsToRelease); + if (UNLIKELY(!Context.hasBlockMarked())) { + mergeGroupsToReleaseBack(Region, GroupsToRelease); + return 0; + } + + // ==================================================================== // + // 4. Release the unused physical pages back to the OS. + // ==================================================================== // + RegionReleaseRecorder Recorder(&Region->MemMapInfo.MemMap, + Region->RegionBeg, + Context.getReleaseOffset()); + auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; + releaseFreeMemoryToOS(Context, Recorder, SkipRegion); + if (Recorder.getReleasedRangesCount() > 0) { + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); + Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + } + Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast(); + + // ====================================================================== // + // 5. Merge the `GroupsToRelease` back to the freelist. + // ====================================================================== // + mergeGroupsToReleaseBack(Region, GroupsToRelease); + + return Recorder.getReleasedBytes(); + } + + bool hasChanceToReleasePages(RegionInfo *Region, uptr BlockSize, + uptr BytesInFreeList, ReleaseToOS ReleaseType) + REQUIRES(Region->MMLock, Region->FLLock) { + DCHECK_GE(Region->FreeListInfo.PoppedBlocks, + Region->FreeListInfo.PushedBlocks); const uptr PageSize = getPageSizeCached(); - DCHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); - const uptr BytesInFreeList = - Region->AllocatedUser - - (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; - if (BytesInFreeList < PageSize) - return 0; // No chance to release anything. - const uptr BytesPushed = (Region->Stats.PushedBlocks - - Region->ReleaseInfo.PushedBlocksAtLastRelease) * - BlockSize; - if (BytesPushed < PageSize) - return 0; // Nothing new to release. + // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value + // so that we won't underestimate the releasable pages. For example, the + // following is the region usage, + // + // BytesInFreeListAtLastCheckpoint AllocatedUser + // v v + // |---------------------------------------> + // ^ ^ + // BytesInFreeList ReleaseThreshold + // + // In general, if we have collected enough bytes and the amount of free + // bytes meets the ReleaseThreshold, we will try to do page release. If we + // don't update `BytesInFreeListAtLastCheckpoint` when the current + // `BytesInFreeList` is smaller, we may take longer time to wait for enough + // freed blocks because we miss the bytes between + // (BytesInFreeListAtLastCheckpoint - BytesInFreeList). + if (BytesInFreeList <= + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) { + Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList; + } + + const uptr RegionPushedBytesDelta = + BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint; + if (RegionPushedBytesDelta < PageSize) + return false; // Releasing smaller blocks is expensive, so we want to make sure that a // significant amount of bytes are free, and that there has been a good // amount of batches pushed to the freelist before attempting to release. - if (BlockSize < PageSize / 16U) { - if (!Force && BytesPushed < Region->AllocatedUser / 16U) - return 0; - // We want 8x% to 9x% free bytes (the larger the block, the lower the %). - if ((BytesInFreeList * 100U) / Region->AllocatedUser < - (100U - 1U - BlockSize / 16U)) - return 0; - } + if (isSmallBlock(BlockSize) && ReleaseType == ReleaseToOS::Normal) + if (RegionPushedBytesDelta < Region->TryReleaseThreshold) + return false; - if (!Force) { + if (ReleaseType == ReleaseToOS::Normal) { const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); if (IntervalMs < 0) - return 0; - if (Region->ReleaseInfo.LastReleaseAtNs + - static_cast(IntervalMs) * 1000000 > - getMonotonicTime()) { - return 0; // Memory was returned recently. + return false; + + // The constant 8 here is selected from profiling some apps and the number + // of unreleased pages in the large size classes is around 16 pages or + // more. Choose half of it as a heuristic and which also avoids page + // release every time for every pushBlocks() attempt by large blocks. + const bool ByPassReleaseInterval = + isLargeBlock(BlockSize) && RegionPushedBytesDelta > 8 * PageSize; + if (!ByPassReleaseInterval) { + if (Region->ReleaseInfo.LastReleaseAtNs + + static_cast(IntervalMs) * 1000000 > + getMonotonicTimeFast()) { + // Memory was returned recently. + return false; + } + } + } // if (ReleaseType == ReleaseToOS::Normal) + + return true; + } + + SinglyLinkedList + collectGroupsToRelease(RegionInfo *Region, const uptr BlockSize, + const uptr AllocatedUserEnd, const uptr CompactPtrBase) + REQUIRES(Region->MMLock, Region->FLLock) { + const uptr GroupSize = (1UL << GroupSizeLog); + const uptr PageSize = getPageSizeCached(); + SinglyLinkedList GroupsToRelease; + + // We are examining each group and will take the minimum distance to the + // release threshold as the next Region::TryReleaseThreshold(). Note that if + // the size of free blocks has reached the release threshold, the distance + // to the next release will be PageSize * SmallerBlockReleasePageDelta. See + // the comment on `SmallerBlockReleasePageDelta` for more details. + uptr MinDistToThreshold = GroupSize; + + for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), + *Prev = nullptr; + BG != nullptr;) { + // Group boundary is always GroupSize-aligned from CompactPtr base. The + // layout of memory groups is like, + // + // (CompactPtrBase) + // #1 CompactPtrGroupBase #2 CompactPtrGroupBase ... + // | | | + // v v v + // +-----------------------+-----------------------+ + // \ / \ / + // --- GroupSize --- --- GroupSize --- + // + // After decompacting the CompactPtrGroupBase, we expect the alignment + // property is held as well. + const uptr BatchGroupBase = + decompactGroupBase(CompactPtrBase, BG->CompactPtrGroupBase); + DCHECK_LE(Region->RegionBeg, BatchGroupBase); + DCHECK_GE(AllocatedUserEnd, BatchGroupBase); + DCHECK_EQ((Region->RegionBeg - BatchGroupBase) % GroupSize, 0U); + // TransferBatches are pushed in front of BG.Batches. The first one may + // not have all caches used. + const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch + + BG->Batches.front()->getCount(); + const uptr BytesInBG = NumBlocks * BlockSize; + + if (BytesInBG <= BG->BytesInBGAtLastCheckpoint) { + BG->BytesInBGAtLastCheckpoint = BytesInBG; + Prev = BG; + BG = BG->Next; + continue; } + + const uptr PushedBytesDelta = BG->BytesInBGAtLastCheckpoint - BytesInBG; + + // Given the randomness property, we try to release the pages only if the + // bytes used by free blocks exceed certain proportion of group size. Note + // that this heuristic only applies when all the spaces in a BatchGroup + // are allocated. + if (isSmallBlock(BlockSize)) { + const uptr BatchGroupEnd = BatchGroupBase + GroupSize; + const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd + ? GroupSize + : AllocatedUserEnd - BatchGroupBase; + const uptr ReleaseThreshold = + (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U; + const bool HighDensity = BytesInBG >= ReleaseThreshold; + const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize); + // If all blocks in the group are released, we will do range marking + // which is fast. Otherwise, we will wait until we have accumulated + // a certain amount of free memory. + const bool ReachReleaseDelta = + MayHaveReleasedAll + ? true + : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta; + + if (!HighDensity) { + DCHECK_LE(BytesInBG, ReleaseThreshold); + // The following is the usage of a memroy group, + // + // BytesInBG ReleaseThreshold + // / \ v + // +---+---------------------------+-----+ + // | | | | | + // +---+---------------------------+-----+ + // \ / ^ + // PushedBytesDelta GroupEnd + MinDistToThreshold = + Min(MinDistToThreshold, + ReleaseThreshold - BytesInBG + PushedBytesDelta); + } else { + // If it reaches high density at this round, the next time we will try + // to release is based on SmallerBlockReleasePageDelta + MinDistToThreshold = + Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta); + } + + if (!HighDensity || !ReachReleaseDelta) { + Prev = BG; + BG = BG->Next; + continue; + } + } + + // If `BG` is the first BatchGroupT in the list, we only need to advance + // `BG` and call FreeListInfo.BlockList::pop_front(). No update is needed + // for `Prev`. + // + // (BG) (BG->Next) + // Prev Cur BG + // | | | + // v v v + // nil +--+ +--+ + // |X | -> | | -> ... + // +--+ +--+ + // + // Otherwise, `Prev` will be used to extract the `Cur` from the + // `FreeListInfo.BlockList`. + // + // (BG) (BG->Next) + // Prev Cur BG + // | | | + // v v v + // +--+ +--+ +--+ + // | | -> |X | -> | | -> ... + // +--+ +--+ +--+ + // + // After FreeListInfo.BlockList::extract(), + // + // Prev Cur BG + // | | | + // v v v + // +--+ +--+ +--+ + // | |-+ |X | +->| | -> ... + // +--+ | +--+ | +--+ + // +--------+ + // + // Note that we need to advance before pushing this BatchGroup to + // GroupsToRelease because it's a destructive operation. + + BatchGroupT *Cur = BG; + BG = BG->Next; + + // Ideally, we may want to update this only after successful release. + // However, for smaller blocks, each block marking is a costly operation. + // Therefore, we update it earlier. + // TODO: Consider updating this after releasing pages if `ReleaseRecorder` + // can tell the released bytes in each group. + Cur->BytesInBGAtLastCheckpoint = BytesInBG; + + if (Prev != nullptr) + Region->FreeListInfo.BlockList.extract(Prev, Cur); + else + Region->FreeListInfo.BlockList.pop_front(); + GroupsToRelease.push_back(Cur); } - ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); - const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId); + // Only small blocks have the adaptive `TryReleaseThreshold`. + if (isSmallBlock(BlockSize)) { + // If the MinDistToThreshold is not updated, that means each memory group + // may have only pushed less than a page size. In that case, just set it + // back to normal. + if (MinDistToThreshold == GroupSize) + MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta; + Region->TryReleaseThreshold = MinDistToThreshold; + } + + return GroupsToRelease; + } + + PageReleaseContext + markFreeBlocks(RegionInfo *Region, const uptr BlockSize, + const uptr AllocatedUserEnd, const uptr CompactPtrBase, + SinglyLinkedList &GroupsToRelease) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + const uptr GroupSize = (1UL << GroupSizeLog); auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) { return decompactPtrInternal(CompactPtrBase, CompactPtr); }; - auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; - releaseFreeMemoryToOS(Region->FreeList, Region->AllocatedUser, 1U, - BlockSize, &Recorder, DecompactPtr, SkipRegion); - if (Recorder.getReleasedRangesCount() > 0) { - Region->ReleaseInfo.PushedBlocksAtLastRelease = - Region->Stats.PushedBlocks; - Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); - Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); + const uptr ReleaseBase = decompactGroupBase( + CompactPtrBase, GroupsToRelease.front()->CompactPtrGroupBase); + const uptr LastGroupEnd = + Min(decompactGroupBase(CompactPtrBase, + GroupsToRelease.back()->CompactPtrGroupBase) + + GroupSize, + AllocatedUserEnd); + // The last block may straddle the group boundary. Rounding up to BlockSize + // to get the exact range. + const uptr ReleaseEnd = + roundUpSlow(LastGroupEnd - Region->RegionBeg, BlockSize) + + Region->RegionBeg; + const uptr ReleaseRangeSize = ReleaseEnd - ReleaseBase; + const uptr ReleaseOffset = ReleaseBase - Region->RegionBeg; + + PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U, + ReleaseRangeSize, ReleaseOffset); + // We may not be able to do the page release in a rare case that we may + // fail on PageMap allocation. + if (UNLIKELY(!Context.ensurePageMapAllocated())) + return Context; + + for (BatchGroupT &BG : GroupsToRelease) { + const uptr BatchGroupBase = + decompactGroupBase(CompactPtrBase, BG.CompactPtrGroupBase); + const uptr BatchGroupEnd = BatchGroupBase + GroupSize; + const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd + ? GroupSize + : AllocatedUserEnd - BatchGroupBase; + const uptr BatchGroupUsedEnd = BatchGroupBase + AllocatedGroupSize; + const bool MayContainLastBlockInRegion = + BatchGroupUsedEnd == AllocatedUserEnd; + const bool BlockAlignedWithUsedEnd = + (BatchGroupUsedEnd - Region->RegionBeg) % BlockSize == 0; + + uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize; + if (!BlockAlignedWithUsedEnd) + ++MaxContainedBlocks; + + const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch + + BG.Batches.front()->getCount(); + + if (NumBlocks == MaxContainedBlocks) { + for (const auto &It : BG.Batches) { + if (&It != BG.Batches.front()) + DCHECK_EQ(It.getCount(), BG.MaxCachedPerBatch); + for (u16 I = 0; I < It.getCount(); ++I) + DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase); + } + + Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd, + Region->RegionBeg, /*RegionIndex=*/0, + Region->MemMapInfo.AllocatedUser); + } else { + DCHECK_LT(NumBlocks, MaxContainedBlocks); + // Note that we don't always visit blocks in each BatchGroup so that we + // may miss the chance of releasing certain pages that cross + // BatchGroups. + Context.markFreeBlocksInRegion( + BG.Batches, DecompactPtr, Region->RegionBeg, /*RegionIndex=*/0, + Region->MemMapInfo.AllocatedUser, MayContainLastBlockInRegion); + } } - Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); - return Recorder.getReleasedBytes(); + + DCHECK(Context.hasBlockMarked()); + + return Context; } + + void mergeGroupsToReleaseBack(RegionInfo *Region, + SinglyLinkedList &GroupsToRelease) + REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + ScopedLock L(Region->FLLock); + + // After merging two freelists, we may have redundant `BatchGroup`s that + // need to be recycled. The number of unused `BatchGroup`s is expected to be + // small. Pick a constant which is inferred from real programs. + constexpr uptr MaxUnusedSize = 8; + CompactPtrT Blocks[MaxUnusedSize]; + u32 Idx = 0; + RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId); + // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s + // when we are manipulating the freelist of `BatchClassRegion`. Instead, we + // should just push it back to the freelist when we merge two `BatchGroup`s. + // This logic hasn't been implemented because we haven't supported releasing + // pages in `BatchClassRegion`. + DCHECK_NE(BatchClassRegion, Region); + + // Merge GroupsToRelease back to the Region::FreeListInfo.BlockList. Note + // that both `Region->FreeListInfo.BlockList` and `GroupsToRelease` are + // sorted. + for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(), + *Prev = nullptr; + ;) { + if (BG == nullptr || GroupsToRelease.empty()) { + if (!GroupsToRelease.empty()) + Region->FreeListInfo.BlockList.append_back(&GroupsToRelease); + break; + } + + DCHECK(!BG->Batches.empty()); + + if (BG->CompactPtrGroupBase < + GroupsToRelease.front()->CompactPtrGroupBase) { + Prev = BG; + BG = BG->Next; + continue; + } + + BatchGroupT *Cur = GroupsToRelease.front(); + TransferBatchT *UnusedTransferBatch = nullptr; + GroupsToRelease.pop_front(); + + if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) { + BG->PushedBlocks += Cur->PushedBlocks; + // We have updated `BatchGroup::BytesInBGAtLastCheckpoint` while + // collecting the `GroupsToRelease`. + BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint; + const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch; + + // Note that the first TransferBatches in both `Batches` may not be + // full and only the first TransferBatch can have non-full blocks. Thus + // we have to merge them before appending one to another. + if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) { + BG->Batches.append_back(&Cur->Batches); + } else { + TransferBatchT *NonFullBatch = Cur->Batches.front(); + Cur->Batches.pop_front(); + const u16 NonFullBatchCount = NonFullBatch->getCount(); + // The remaining Batches in `Cur` are full. + BG->Batches.append_back(&Cur->Batches); + + if (BG->Batches.front()->getCount() == MaxCachedPerBatch) { + // Only 1 non-full TransferBatch, push it to the front. + BG->Batches.push_front(NonFullBatch); + } else { + const u16 NumBlocksToMove = static_cast( + Min(static_cast(MaxCachedPerBatch - + BG->Batches.front()->getCount()), + NonFullBatchCount)); + BG->Batches.front()->appendFromTransferBatch(NonFullBatch, + NumBlocksToMove); + if (NonFullBatch->isEmpty()) + UnusedTransferBatch = NonFullBatch; + else + BG->Batches.push_front(NonFullBatch); + } + } + + const u32 NeededSlots = UnusedTransferBatch == nullptr ? 1U : 2U; + if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) { + ScopedLock L(BatchClassRegion->FLLock); + pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); + if (conditionVariableEnabled()) + BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); + Idx = 0; + } + Blocks[Idx++] = + compactPtr(SizeClassMap::BatchClassId, reinterpret_cast(Cur)); + if (UnusedTransferBatch) { + Blocks[Idx++] = + compactPtr(SizeClassMap::BatchClassId, + reinterpret_cast(UnusedTransferBatch)); + } + Prev = BG; + BG = BG->Next; + continue; + } + + // At here, the `BG` is the first BatchGroup with CompactPtrGroupBase + // larger than the first element in `GroupsToRelease`. We need to insert + // `GroupsToRelease::front()` (which is `Cur` below) before `BG`. + // + // 1. If `Prev` is nullptr, we simply push `Cur` to the front of + // FreeListInfo.BlockList. + // 2. Otherwise, use `insert()` which inserts an element next to `Prev`. + // + // Afterwards, we don't need to advance `BG` because the order between + // `BG` and the new `GroupsToRelease::front()` hasn't been checked. + if (Prev == nullptr) + Region->FreeListInfo.BlockList.push_front(Cur); + else + Region->FreeListInfo.BlockList.insert(Prev, Cur); + DCHECK_EQ(Cur->Next, BG); + Prev = Cur; + } + + if (Idx != 0) { + ScopedLock L(BatchClassRegion->FLLock); + pushBatchClassBlocks(BatchClassRegion, Blocks, Idx); + if (conditionVariableEnabled()) + BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock); + } + + if (SCUDO_DEBUG) { + BatchGroupT *Prev = Region->FreeListInfo.BlockList.front(); + for (BatchGroupT *Cur = Prev->Next; Cur != nullptr; + Prev = Cur, Cur = Cur->Next) { + CHECK_LT(Prev->CompactPtrGroupBase, Cur->CompactPtrGroupBase); + } + } + + if (conditionVariableEnabled()) + Region->FLLockCV.notifyAll(Region->FLLock); + } + + // TODO: `PrimaryBase` can be obtained from ReservedMemory. This needs to be + // deprecated. + uptr PrimaryBase = 0; + ReservedMemoryT ReservedMemory = {}; + // The minimum size of pushed blocks that we will try to release the pages in + // that size class. + uptr SmallerBlockReleasePageDelta = 0; + atomic_s32 ReleaseToOsIntervalMs = {}; + alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; }; } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/quarantine.h b/third_party/llvm-scudo-standalone/quarantine.h index 2d231c3..b5f8db0 100644 --- a/third_party/llvm-scudo-standalone/quarantine.h +++ b/third_party/llvm-scudo-standalone/quarantine.h @@ -12,6 +12,7 @@ #include "list.h" #include "mutex.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -172,7 +173,7 @@ template class GlobalQuarantine { typedef QuarantineCache CacheT; using ThisT = GlobalQuarantine; - void init(uptr Size, uptr CacheSize) { + void init(uptr Size, uptr CacheSize) NO_THREAD_SAFETY_ANALYSIS { DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); DCHECK_EQ(atomic_load_relaxed(&MaxSize), 0U); DCHECK_EQ(atomic_load_relaxed(&MinSize), 0U); @@ -191,22 +192,31 @@ template class GlobalQuarantine { uptr getMaxSize() const { return atomic_load_relaxed(&MaxSize); } uptr getCacheSize() const { return atomic_load_relaxed(&MaxCacheSize); } + // This is supposed to be used in test only. + bool isEmpty() { + ScopedLock L(CacheMutex); + return Cache.getSize() == 0U; + } + void put(CacheT *C, Callback Cb, Node *Ptr, uptr Size) { C->enqueue(Cb, Ptr, Size); if (C->getSize() > getCacheSize()) drain(C, Cb); } - void NOINLINE drain(CacheT *C, Callback Cb) { + void NOINLINE drain(CacheT *C, Callback Cb) EXCLUDES(CacheMutex) { + bool needRecycle = false; { ScopedLock L(CacheMutex); Cache.transfer(C); + needRecycle = Cache.getSize() > getMaxSize(); } - if (Cache.getSize() > getMaxSize() && RecycleMutex.tryLock()) + + if (needRecycle && RecycleMutex.tryLock()) recycle(atomic_load_relaxed(&MinSize), Cb); } - void NOINLINE drainAndRecycle(CacheT *C, Callback Cb) { + void NOINLINE drainAndRecycle(CacheT *C, Callback Cb) EXCLUDES(CacheMutex) { { ScopedLock L(CacheMutex); Cache.transfer(C); @@ -215,20 +225,21 @@ template class GlobalQuarantine { recycle(0, Cb); } - void getStats(ScopedString *Str) const { + void getStats(ScopedString *Str) EXCLUDES(CacheMutex) { + ScopedLock L(CacheMutex); // It assumes that the world is stopped, just as the allocator's printStats. Cache.getStats(Str); Str->append("Quarantine limits: global: %zuK; thread local: %zuK\n", getMaxSize() >> 10, getCacheSize() >> 10); } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { // RecycleMutex must be locked 1st since we grab CacheMutex within recycle. RecycleMutex.lock(); CacheMutex.lock(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { CacheMutex.unlock(); RecycleMutex.unlock(); } @@ -236,13 +247,14 @@ template class GlobalQuarantine { private: // Read-only data. alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex CacheMutex; - CacheT Cache; + CacheT Cache GUARDED_BY(CacheMutex); alignas(SCUDO_CACHE_LINE_SIZE) HybridMutex RecycleMutex; atomic_uptr MinSize = {}; atomic_uptr MaxSize = {}; alignas(SCUDO_CACHE_LINE_SIZE) atomic_uptr MaxCacheSize = {}; - void NOINLINE recycle(uptr MinSize, Callback Cb) { + void NOINLINE recycle(uptr MinSize, Callback Cb) RELEASE(RecycleMutex) + EXCLUDES(CacheMutex) { CacheT Tmp; Tmp.init(); { diff --git a/third_party/llvm-scudo-standalone/release.cpp b/third_party/llvm-scudo-standalone/release.cpp index 5d7c6c5..875a2b0 100644 --- a/third_party/llvm-scudo-standalone/release.cpp +++ b/third_party/llvm-scudo-standalone/release.cpp @@ -10,7 +10,8 @@ namespace scudo { -HybridMutex PackedCounterArray::Mutex = {}; -uptr PackedCounterArray::StaticBuffer[PackedCounterArray::StaticBufferCount]; +BufferPool + RegionPageMap::Buffers; } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/release.h b/third_party/llvm-scudo-standalone/release.h index 293a8bc..b6f76a4 100644 --- a/third_party/llvm-scudo-standalone/release.h +++ b/third_party/llvm-scudo-standalone/release.h @@ -11,14 +11,46 @@ #include "common.h" #include "list.h" +#include "mem_map.h" #include "mutex.h" +#include "thread_annotations.h" namespace scudo { +template class RegionReleaseRecorder { +public: + RegionReleaseRecorder(MemMapT *RegionMemMap, uptr Base, uptr Offset = 0) + : RegionMemMap(RegionMemMap), Base(Base), Offset(Offset) {} + + uptr getReleasedRangesCount() const { return ReleasedRangesCount; } + + uptr getReleasedBytes() const { return ReleasedBytes; } + + uptr getBase() const { return Base; } + + // Releases [From, To) range of pages back to OS. Note that `From` and `To` + // are offseted from `Base` + Offset. + void releasePageRangeToOS(uptr From, uptr To) { + const uptr Size = To - From; + RegionMemMap->releasePagesToOS(getBase() + Offset + From, Size); + ReleasedRangesCount++; + ReleasedBytes += Size; + } + +private: + uptr ReleasedRangesCount = 0; + uptr ReleasedBytes = 0; + MemMapT *RegionMemMap = nullptr; + uptr Base = 0; + // The release offset from Base. This is used when we know a given range after + // Base will not be released. + uptr Offset = 0; +}; + class ReleaseRecorder { public: - ReleaseRecorder(uptr Base, MapPlatformData *Data = nullptr) - : Base(Base), Data(Data) {} + ReleaseRecorder(uptr Base, uptr Offset = 0, MapPlatformData *Data = nullptr) + : Base(Base), Offset(Offset), Data(Data) {} uptr getReleasedRangesCount() const { return ReleasedRangesCount; } @@ -29,7 +61,7 @@ class ReleaseRecorder { // Releases [From, To) range of pages back to OS. void releasePageRangeToOS(uptr From, uptr To) { const uptr Size = To - From; - releasePagesToOS(Base, From, Size, Data); + releasePagesToOS(Base, From + Offset, Size, Data); ReleasedRangesCount++; ReleasedBytes += Size; } @@ -37,31 +69,171 @@ class ReleaseRecorder { private: uptr ReleasedRangesCount = 0; uptr ReleasedBytes = 0; + // The starting address to release. Note that we may want to combine (Base + + // Offset) as a new Base. However, the Base is retrieved from + // `MapPlatformData` on Fuchsia, which means the offset won't be aware. + // Therefore, store them separately to make it work on all the platforms. uptr Base = 0; + // The release offset from Base. This is used when we know a given range after + // Base will not be released. + uptr Offset = 0; MapPlatformData *Data = nullptr; }; -// A packed array of Counters. Each counter occupies 2^N bits, enough to store -// counter's MaxValue. Ctor will try to use a static buffer first, and if that -// fails (the buffer is too small or already locked), will allocate the +class FragmentationRecorder { +public: + FragmentationRecorder() = default; + + uptr getReleasedPagesCount() const { return ReleasedPagesCount; } + + void releasePageRangeToOS(uptr From, uptr To) { + DCHECK_EQ((To - From) % getPageSizeCached(), 0U); + ReleasedPagesCount += (To - From) / getPageSizeCached(); + } + +private: + uptr ReleasedPagesCount = 0; +}; + +// A buffer pool which holds a fixed number of static buffers of `uptr` elements +// for fast buffer allocation. If the request size is greater than +// `StaticBufferNumElements` or if all the static buffers are in use, it'll +// delegate the allocation to map(). +template +class BufferPool { +public: + // Preserve 1 bit in the `Mask` so that we don't need to do zero-check while + // extracting the least significant bit from the `Mask`. + static_assert(StaticBufferCount < SCUDO_WORDSIZE, ""); + static_assert(isAligned(StaticBufferNumElements * sizeof(uptr), + SCUDO_CACHE_LINE_SIZE), + ""); + + struct Buffer { + // Pointer to the buffer's memory, or nullptr if no buffer was allocated. + uptr *Data = nullptr; + + // The index of the underlying static buffer, or StaticBufferCount if this + // buffer was dynamically allocated. This value is initially set to a poison + // value to aid debugging. + uptr BufferIndex = ~static_cast(0); + + // Only valid if BufferIndex == StaticBufferCount. + MemMapT MemMap = {}; + }; + + // Return a zero-initialized buffer which can contain at least the given + // number of elements, or nullptr on failure. + Buffer getBuffer(const uptr NumElements) { + if (UNLIKELY(NumElements > StaticBufferNumElements)) + return getDynamicBuffer(NumElements); + + uptr index; + { + // TODO: In general, we expect this operation should be fast so the + // waiting thread won't be put into sleep. The HybridMutex does implement + // the busy-waiting but we may want to review the performance and see if + // we need an explict spin lock here. + ScopedLock L(Mutex); + index = getLeastSignificantSetBitIndex(Mask); + if (index < StaticBufferCount) + Mask ^= static_cast(1) << index; + } + + if (index >= StaticBufferCount) + return getDynamicBuffer(NumElements); + + Buffer Buf; + Buf.Data = &RawBuffer[index * StaticBufferNumElements]; + Buf.BufferIndex = index; + memset(Buf.Data, 0, StaticBufferNumElements * sizeof(uptr)); + return Buf; + } + + void releaseBuffer(Buffer Buf) { + DCHECK_NE(Buf.Data, nullptr); + DCHECK_LE(Buf.BufferIndex, StaticBufferCount); + if (Buf.BufferIndex != StaticBufferCount) { + ScopedLock L(Mutex); + DCHECK_EQ((Mask & (static_cast(1) << Buf.BufferIndex)), 0U); + Mask |= static_cast(1) << Buf.BufferIndex; + } else { + Buf.MemMap.unmap(Buf.MemMap.getBase(), Buf.MemMap.getCapacity()); + } + } + + bool isStaticBufferTestOnly(const Buffer &Buf) { + DCHECK_NE(Buf.Data, nullptr); + DCHECK_LE(Buf.BufferIndex, StaticBufferCount); + return Buf.BufferIndex != StaticBufferCount; + } + +private: + Buffer getDynamicBuffer(const uptr NumElements) { + // When using a heap-based buffer, precommit the pages backing the + // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization + // where page fault exceptions are skipped as the allocated memory + // is accessed. So far, this is only enabled on Fuchsia. It hasn't proven a + // performance benefit on other platforms. + const uptr MmapFlags = MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0); + const uptr MappedSize = + roundUp(NumElements * sizeof(uptr), getPageSizeCached()); + Buffer Buf; + if (Buf.MemMap.map(/*Addr=*/0, MappedSize, "scudo:counters", MmapFlags)) { + Buf.Data = reinterpret_cast(Buf.MemMap.getBase()); + Buf.BufferIndex = StaticBufferCount; + } + return Buf; + } + + HybridMutex Mutex; + // '1' means that buffer index is not used. '0' means the buffer is in use. + uptr Mask GUARDED_BY(Mutex) = ~static_cast(0); + uptr RawBuffer[StaticBufferCount * StaticBufferNumElements] GUARDED_BY(Mutex); +}; + +// A Region page map is used to record the usage of pages in the regions. It +// implements a packed array of Counters. Each counter occupies 2^N bits, enough +// to store counter's MaxValue. Ctor will try to use a static buffer first, and +// if that fails (the buffer is too small or already locked), will allocate the // required Buffer via map(). The caller is expected to check whether the // initialization was successful by checking isAllocated() result. For // performance sake, none of the accessors check the validity of the arguments, // It is assumed that Index is always in [0, N) range and the value is not // incremented past MaxValue. -class PackedCounterArray { +class RegionPageMap { public: - PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion, - uptr MaxValue) - : Regions(NumberOfRegions), NumCounters(CountersPerRegion) { - DCHECK_GT(Regions, 0); - DCHECK_GT(NumCounters, 0); + RegionPageMap() + : Regions(0), NumCounters(0), CounterSizeBitsLog(0), CounterMask(0), + PackingRatioLog(0), BitOffsetMask(0), SizePerRegion(0), + BufferNumElements(0) {} + RegionPageMap(uptr NumberOfRegions, uptr CountersPerRegion, uptr MaxValue) { + reset(NumberOfRegions, CountersPerRegion, MaxValue); + } + ~RegionPageMap() { + if (!isAllocated()) + return; + Buffers.releaseBuffer(Buffer); + Buffer = {}; + } + + // Lock of `StaticBuffer` is acquired conditionally and there's no easy way to + // specify the thread-safety attribute properly in current code structure. + // Besides, it's the only place we may want to check thread safety. Therefore, + // it's fine to bypass the thread-safety analysis now. + void reset(uptr NumberOfRegion, uptr CountersPerRegion, uptr MaxValue) { + DCHECK_GT(NumberOfRegion, 0); + DCHECK_GT(CountersPerRegion, 0); DCHECK_GT(MaxValue, 0); - constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL; + + Regions = NumberOfRegion; + NumCounters = CountersPerRegion; + + constexpr uptr MaxCounterBits = sizeof(*Buffer.Data) * 8UL; // Rounding counter storage size up to the power of two allows for using // bit shifts calculating particular counter's Index and offset. const uptr CounterSizeBits = - roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1); + roundUpPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1); DCHECK_LE(CounterSizeBits, MaxCounterBits); CounterSizeBitsLog = getLog2(CounterSizeBits); CounterMask = ~(static_cast(0)) >> (MaxCounterBits - CounterSizeBits); @@ -72,30 +244,13 @@ class PackedCounterArray { BitOffsetMask = PackingRatio - 1; SizePerRegion = - roundUpTo(NumCounters, static_cast(1U) << PackingRatioLog) >> + roundUp(NumCounters, static_cast(1U) << PackingRatioLog) >> PackingRatioLog; - BufferSize = SizePerRegion * sizeof(*Buffer) * Regions; - if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) && - Mutex.tryLock()) { - Buffer = &StaticBuffer[0]; - memset(Buffer, 0, BufferSize); - } else { - Buffer = reinterpret_cast( - map(nullptr, roundUpTo(BufferSize, getPageSizeCached()), - "scudo:counters", MAP_ALLOWNOMEM)); - } - } - ~PackedCounterArray() { - if (!isAllocated()) - return; - if (Buffer == &StaticBuffer[0]) - Mutex.unlock(); - else - unmap(reinterpret_cast(Buffer), - roundUpTo(BufferSize, getPageSizeCached())); + BufferNumElements = SizePerRegion * Regions; + Buffer = Buffers.getBuffer(BufferNumElements); } - bool isAllocated() const { return !!Buffer; } + bool isAllocated() const { return Buffer.Data != nullptr; } uptr getCount() const { return NumCounters; } @@ -104,7 +259,8 @@ class PackedCounterArray { DCHECK_LT(I, NumCounters); const uptr Index = I >> PackingRatioLog; const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; - return (Buffer[Region * SizePerRegion + Index] >> BitOffset) & CounterMask; + return (Buffer.Data[Region * SizePerRegion + Index] >> BitOffset) & + CounterMask; } void inc(uptr Region, uptr I) const { @@ -112,8 +268,20 @@ class PackedCounterArray { const uptr Index = I >> PackingRatioLog; const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; DCHECK_LT(BitOffset, SCUDO_WORDSIZE); - Buffer[Region * SizePerRegion + Index] += static_cast(1U) - << BitOffset; + DCHECK_EQ(isAllCounted(Region, I), false); + Buffer.Data[Region * SizePerRegion + Index] += static_cast(1U) + << BitOffset; + } + + void incN(uptr Region, uptr I, uptr N) const { + DCHECK_GT(N, 0U); + DCHECK_LE(N, CounterMask); + DCHECK_LE(get(Region, I), CounterMask - N); + const uptr Index = I >> PackingRatioLog; + const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; + DCHECK_LT(BitOffset, SCUDO_WORDSIZE); + DCHECK_EQ(isAllCounted(Region, I), false); + Buffer.Data[Region * SizePerRegion + Index] += N << BitOffset; } void incRange(uptr Region, uptr From, uptr To) const { @@ -123,33 +291,67 @@ class PackedCounterArray { inc(Region, I); } - uptr getBufferSize() const { return BufferSize; } + // Set the counter to the max value. Note that the max number of blocks in a + // page may vary. To provide an easier way to tell if all the blocks are + // counted for different pages, set to the same max value to denote the + // all-counted status. + void setAsAllCounted(uptr Region, uptr I) const { + DCHECK_LE(get(Region, I), CounterMask); + const uptr Index = I >> PackingRatioLog; + const uptr BitOffset = (I & BitOffsetMask) << CounterSizeBitsLog; + DCHECK_LT(BitOffset, SCUDO_WORDSIZE); + Buffer.Data[Region * SizePerRegion + Index] |= CounterMask << BitOffset; + } + void setAsAllCountedRange(uptr Region, uptr From, uptr To) const { + DCHECK_LE(From, To); + const uptr Top = Min(To + 1, NumCounters); + for (uptr I = From; I < Top; I++) + setAsAllCounted(Region, I); + } + + bool updateAsAllCountedIf(uptr Region, uptr I, uptr MaxCount) { + const uptr Count = get(Region, I); + if (Count == CounterMask) + return true; + if (Count == MaxCount) { + setAsAllCounted(Region, I); + return true; + } + return false; + } + bool isAllCounted(uptr Region, uptr I) const { + return get(Region, I) == CounterMask; + } - static const uptr StaticBufferCount = 2048U; + uptr getBufferNumElements() const { return BufferNumElements; } private: - const uptr Regions; - const uptr NumCounters; + // We may consider making this configurable if there are cases which may + // benefit from this. + static const uptr StaticBufferCount = 2U; + static const uptr StaticBufferNumElements = 512U; + using BufferPoolT = BufferPool; + static BufferPoolT Buffers; + + uptr Regions; + uptr NumCounters; uptr CounterSizeBitsLog; uptr CounterMask; uptr PackingRatioLog; uptr BitOffsetMask; uptr SizePerRegion; - uptr BufferSize; - uptr *Buffer; - - static HybridMutex Mutex; - static uptr StaticBuffer[StaticBufferCount]; + uptr BufferNumElements; + BufferPoolT::Buffer Buffer; }; template class FreePagesRangeTracker { public: - explicit FreePagesRangeTracker(ReleaseRecorderT *Recorder) + explicit FreePagesRangeTracker(ReleaseRecorderT &Recorder) : Recorder(Recorder), PageSizeLog(getLog2(getPageSizeCached())) {} - void processNextPage(bool Freed) { - if (Freed) { + void processNextPage(bool Released) { + if (Released) { if (!InRange) { CurrentRangeStatePage = CurrentPage; InRange = true; @@ -170,113 +372,271 @@ template class FreePagesRangeTracker { private: void closeOpenedRange() { if (InRange) { - Recorder->releasePageRangeToOS((CurrentRangeStatePage << PageSizeLog), - (CurrentPage << PageSizeLog)); + Recorder.releasePageRangeToOS((CurrentRangeStatePage << PageSizeLog), + (CurrentPage << PageSizeLog)); InRange = false; } } - ReleaseRecorderT *const Recorder; + ReleaseRecorderT &Recorder; const uptr PageSizeLog; bool InRange = false; uptr CurrentPage = 0; uptr CurrentRangeStatePage = 0; }; -template -NOINLINE void -releaseFreeMemoryToOS(const IntrusiveList &FreeList, - uptr RegionSize, uptr NumberOfRegions, uptr BlockSize, - ReleaseRecorderT *Recorder, DecompactPtrT DecompactPtr, - SkipRegionT SkipRegion) { - const uptr PageSize = getPageSizeCached(); - - // Figure out the number of chunks per page and whether we can take a fast - // path (the number of chunks per page is the same for all pages). - uptr FullPagesBlockCountMax; - bool SameBlockCountPerPage; - if (BlockSize <= PageSize) { - if (PageSize % BlockSize == 0) { - // Same number of chunks per page, no cross overs. - FullPagesBlockCountMax = PageSize / BlockSize; - SameBlockCountPerPage = true; - } else if (BlockSize % (PageSize % BlockSize) == 0) { - // Some chunks are crossing page boundaries, which means that the page - // contains one or two partial chunks, but all pages contain the same - // number of chunks. - FullPagesBlockCountMax = PageSize / BlockSize + 1; - SameBlockCountPerPage = true; +struct PageReleaseContext { + PageReleaseContext(uptr BlockSize, uptr NumberOfRegions, uptr ReleaseSize, + uptr ReleaseOffset = 0) + : BlockSize(BlockSize), NumberOfRegions(NumberOfRegions) { + PageSize = getPageSizeCached(); + if (BlockSize <= PageSize) { + if (PageSize % BlockSize == 0) { + // Same number of chunks per page, no cross overs. + FullPagesBlockCountMax = PageSize / BlockSize; + SameBlockCountPerPage = true; + } else if (BlockSize % (PageSize % BlockSize) == 0) { + // Some chunks are crossing page boundaries, which means that the page + // contains one or two partial chunks, but all pages contain the same + // number of chunks. + FullPagesBlockCountMax = PageSize / BlockSize + 1; + SameBlockCountPerPage = true; + } else { + // Some chunks are crossing page boundaries, which means that the page + // contains one or two partial chunks. + FullPagesBlockCountMax = PageSize / BlockSize + 2; + SameBlockCountPerPage = false; + } } else { - // Some chunks are crossing page boundaries, which means that the page - // contains one or two partial chunks. - FullPagesBlockCountMax = PageSize / BlockSize + 2; - SameBlockCountPerPage = false; + if (BlockSize % PageSize == 0) { + // One chunk covers multiple pages, no cross overs. + FullPagesBlockCountMax = 1; + SameBlockCountPerPage = true; + } else { + // One chunk covers multiple pages, Some chunks are crossing page + // boundaries. Some pages contain one chunk, some contain two. + FullPagesBlockCountMax = 2; + SameBlockCountPerPage = false; + } } - } else { - if (BlockSize % PageSize == 0) { - // One chunk covers multiple pages, no cross overs. - FullPagesBlockCountMax = 1; - SameBlockCountPerPage = true; + + // TODO: For multiple regions, it's more complicated to support partial + // region marking (which includes the complexity of how to handle the last + // block in a region). We may consider this after markFreeBlocks() accepts + // only free blocks from the same region. + if (NumberOfRegions != 1) + DCHECK_EQ(ReleaseOffset, 0U); + + PagesCount = roundUp(ReleaseSize, PageSize) / PageSize; + PageSizeLog = getLog2(PageSize); + ReleasePageOffset = ReleaseOffset >> PageSizeLog; + } + + // PageMap is lazily allocated when markFreeBlocks() is invoked. + bool hasBlockMarked() const { + return PageMap.isAllocated(); + } + + bool ensurePageMapAllocated() { + if (PageMap.isAllocated()) + return true; + PageMap.reset(NumberOfRegions, PagesCount, FullPagesBlockCountMax); + // TODO: Log some message when we fail on PageMap allocation. + return PageMap.isAllocated(); + } + + // Mark all the blocks in the given range [From, to). Instead of visiting all + // the blocks, we will just mark the page as all counted. Note the `From` and + // `To` has to be page aligned but with one exception, if `To` is equal to the + // RegionSize, it's not necessary to be aligned with page size. + bool markRangeAsAllCounted(uptr From, uptr To, uptr Base, + const uptr RegionIndex, const uptr RegionSize) { + DCHECK_LT(From, To); + DCHECK_LE(To, Base + RegionSize); + DCHECK_EQ(From % PageSize, 0U); + DCHECK_LE(To - From, RegionSize); + + if (!ensurePageMapAllocated()) + return false; + + uptr FromInRegion = From - Base; + uptr ToInRegion = To - Base; + uptr FirstBlockInRange = roundUpSlow(FromInRegion, BlockSize); + + // The straddling block sits across entire range. + if (FirstBlockInRange >= ToInRegion) + return true; + + // First block may not sit at the first pape in the range, move + // `FromInRegion` to the first block page. + FromInRegion = roundDown(FirstBlockInRange, PageSize); + + // When The first block is not aligned to the range boundary, which means + // there is a block sitting acorss `From`, that looks like, + // + // From To + // V V + // +-----------------------------------------------+ + // +-----+-----+-----+-----+ + // | | | | | ... + // +-----+-----+-----+-----+ + // |- first page -||- second page -||- ... + // + // Therefore, we can't just mark the first page as all counted. Instead, we + // increment the number of blocks in the first page in the page map and + // then round up the `From` to the next page. + if (FirstBlockInRange != FromInRegion) { + DCHECK_GT(FromInRegion + PageSize, FirstBlockInRange); + uptr NumBlocksInFirstPage = + (FromInRegion + PageSize - FirstBlockInRange + BlockSize - 1) / + BlockSize; + PageMap.incN(RegionIndex, getPageIndex(FromInRegion), + NumBlocksInFirstPage); + FromInRegion = roundUp(FromInRegion + 1, PageSize); + } + + uptr LastBlockInRange = roundDownSlow(ToInRegion - 1, BlockSize); + + // Note that LastBlockInRange may be smaller than `FromInRegion` at this + // point because it may contain only one block in the range. + + // When the last block sits across `To`, we can't just mark the pages + // occupied by the last block as all counted. Instead, we increment the + // counters of those pages by 1. The exception is that if it's the last + // block in the region, it's fine to mark those pages as all counted. + if (LastBlockInRange + BlockSize != RegionSize) { + DCHECK_EQ(ToInRegion % PageSize, 0U); + // The case below is like, + // + // From To + // V V + // +----------------------------------------+ + // +-----+-----+-----+-----+ + // | | | | | ... + // +-----+-----+-----+-----+ + // ... -||- last page -||- next page -| + // + // The last block is not aligned to `To`, we need to increment the + // counter of `next page` by 1. + if (LastBlockInRange + BlockSize != ToInRegion) { + PageMap.incRange(RegionIndex, getPageIndex(ToInRegion), + getPageIndex(LastBlockInRange + BlockSize - 1)); + } } else { - // One chunk covers multiple pages, Some chunks are crossing page - // boundaries. Some pages contain one chunk, some contain two. - FullPagesBlockCountMax = 2; - SameBlockCountPerPage = false; + ToInRegion = RegionSize; + } + + // After handling the first page and the last block, it's safe to mark any + // page in between the range [From, To). + if (FromInRegion < ToInRegion) { + PageMap.setAsAllCountedRange(RegionIndex, getPageIndex(FromInRegion), + getPageIndex(ToInRegion - 1)); } + + return true; } - const uptr PagesCount = roundUpTo(RegionSize, PageSize) / PageSize; - PackedCounterArray Counters(NumberOfRegions, PagesCount, - FullPagesBlockCountMax); - if (!Counters.isAllocated()) - return; - - const uptr PageSizeLog = getLog2(PageSize); - const uptr RoundedRegionSize = PagesCount << PageSizeLog; - const uptr RoundedSize = NumberOfRegions * RoundedRegionSize; - - // Iterate over free chunks and count how many free chunks affect each - // allocated page. - if (BlockSize <= PageSize && PageSize % BlockSize == 0) { - // Each chunk affects one page only. - for (const auto &It : FreeList) { - for (u32 I = 0; I < It.getCount(); I++) { - const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase(); - if (P >= RoundedSize) - continue; - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - const uptr PInRegion = P - RegionIndex * RegionSize; - Counters.inc(RegionIndex, PInRegion >> PageSizeLog); + template + bool markFreeBlocksInRegion(const IntrusiveList &FreeList, + DecompactPtrT DecompactPtr, const uptr Base, + const uptr RegionIndex, const uptr RegionSize, + bool MayContainLastBlockInRegion) { + if (!ensurePageMapAllocated()) + return false; + + if (MayContainLastBlockInRegion) { + const uptr LastBlockInRegion = + ((RegionSize / BlockSize) - 1U) * BlockSize; + // The last block in a region may not use the entire page, we mark the + // following "pretend" memory block(s) as free in advance. + // + // Region Boundary + // v + // -----+-----------------------+ + // | Last Page | <- Rounded Region Boundary + // -----+-----------------------+ + // |-----||- trailing blocks -| + // ^ + // last block + const uptr RoundedRegionSize = roundUp(RegionSize, PageSize); + const uptr TrailingBlockBase = LastBlockInRegion + BlockSize; + // If the difference between `RoundedRegionSize` and + // `TrailingBlockBase` is larger than a page, that implies the reported + // `RegionSize` may not be accurate. + DCHECK_LT(RoundedRegionSize - TrailingBlockBase, PageSize); + + // Only the last page touched by the last block needs to mark the trailing + // blocks. Note that if the last "pretend" block straddles the boundary, + // we still have to count it in so that the logic of counting the number + // of blocks on a page is consistent. + uptr NumTrailingBlocks = + (roundUpSlow(RoundedRegionSize - TrailingBlockBase, BlockSize) + + BlockSize - 1) / + BlockSize; + if (NumTrailingBlocks > 0) { + PageMap.incN(RegionIndex, getPageIndex(TrailingBlockBase), + NumTrailingBlocks); } } - } else { - // In all other cases chunks might affect more than one page. - DCHECK_GE(RegionSize, BlockSize); - const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize; - for (const auto &It : FreeList) { - for (u32 I = 0; I < It.getCount(); I++) { - const uptr P = DecompactPtr(It.get(I)) - Recorder->getBase(); - if (P >= RoundedSize) - continue; - const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize; - uptr PInRegion = P - RegionIndex * RegionSize; - Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, - (PInRegion + BlockSize - 1) >> PageSizeLog); - // The last block in a region might straddle a page, so if it's - // free, we mark the following "pretend" memory block(s) as free. - if (PInRegion == LastBlockInRegion) { - PInRegion += BlockSize; - while (PInRegion < RoundedRegionSize) { - Counters.incRange(RegionIndex, PInRegion >> PageSizeLog, - (PInRegion + BlockSize - 1) >> PageSizeLog); - PInRegion += BlockSize; - } + + // Iterate over free chunks and count how many free chunks affect each + // allocated page. + if (BlockSize <= PageSize && PageSize % BlockSize == 0) { + // Each chunk affects one page only. + for (const auto &It : FreeList) { + for (u16 I = 0; I < It.getCount(); I++) { + const uptr PInRegion = DecompactPtr(It.get(I)) - Base; + DCHECK_LT(PInRegion, RegionSize); + PageMap.inc(RegionIndex, getPageIndex(PInRegion)); + } + } + } else { + // In all other cases chunks might affect more than one page. + DCHECK_GE(RegionSize, BlockSize); + for (const auto &It : FreeList) { + for (u16 I = 0; I < It.getCount(); I++) { + const uptr PInRegion = DecompactPtr(It.get(I)) - Base; + PageMap.incRange(RegionIndex, getPageIndex(PInRegion), + getPageIndex(PInRegion + BlockSize - 1)); } } } + + return true; } + uptr getPageIndex(uptr P) { return (P >> PageSizeLog) - ReleasePageOffset; } + uptr getReleaseOffset() { return ReleasePageOffset << PageSizeLog; } + + uptr BlockSize; + uptr NumberOfRegions; + // For partial region marking, some pages in front are not needed to be + // counted. + uptr ReleasePageOffset; + uptr PageSize; + uptr PagesCount; + uptr PageSizeLog; + uptr FullPagesBlockCountMax; + bool SameBlockCountPerPage; + RegionPageMap PageMap; +}; + +// Try to release the page which doesn't have any in-used block, i.e., they are +// all free blocks. The `PageMap` will record the number of free blocks in each +// page. +template +NOINLINE void +releaseFreeMemoryToOS(PageReleaseContext &Context, + ReleaseRecorderT &Recorder, SkipRegionT SkipRegion) { + const uptr PageSize = Context.PageSize; + const uptr BlockSize = Context.BlockSize; + const uptr PagesCount = Context.PagesCount; + const uptr NumberOfRegions = Context.NumberOfRegions; + const uptr ReleasePageOffset = Context.ReleasePageOffset; + const uptr FullPagesBlockCountMax = Context.FullPagesBlockCountMax; + const bool SameBlockCountPerPage = Context.SameBlockCountPerPage; + RegionPageMap &PageMap = Context.PageMap; + // Iterate over pages detecting ranges of pages with chunk Counters equal // to the expected number of chunks for the particular page. FreePagesRangeTracker RangeTracker(Recorder); @@ -287,9 +647,11 @@ releaseFreeMemoryToOS(const IntrusiveList &FreeList, RangeTracker.skipPages(PagesCount); continue; } - for (uptr J = 0; J < PagesCount; J++) - RangeTracker.processNextPage(Counters.get(I, J) == - FullPagesBlockCountMax); + for (uptr J = 0; J < PagesCount; J++) { + const bool CanRelease = + PageMap.updateAsAllCountedIf(I, J, FullPagesBlockCountMax); + RangeTracker.processNextPage(CanRelease); + } } } else { // Slow path, go through the pages keeping count how many chunks affect @@ -308,6 +670,10 @@ releaseFreeMemoryToOS(const IntrusiveList &FreeList, } uptr PrevPageBoundary = 0; uptr CurrentBoundary = 0; + if (ReleasePageOffset > 0) { + PrevPageBoundary = ReleasePageOffset * PageSize; + CurrentBoundary = roundUpSlow(PrevPageBoundary, BlockSize); + } for (uptr J = 0; J < PagesCount; J++) { const uptr PageBoundary = PrevPageBoundary + PageSize; uptr BlocksPerPage = Pn; @@ -321,7 +687,9 @@ releaseFreeMemoryToOS(const IntrusiveList &FreeList, } } PrevPageBoundary = PageBoundary; - RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage); + const bool CanRelease = + PageMap.updateAsAllCountedIf(I, J, BlocksPerPage); + RangeTracker.processNextPage(CanRelease); } } } diff --git a/third_party/llvm-scudo-standalone/report.cpp b/third_party/llvm-scudo-standalone/report.cpp index 561c7c5..9cef0ad 100644 --- a/third_party/llvm-scudo-standalone/report.cpp +++ b/third_party/llvm-scudo-standalone/report.cpp @@ -21,14 +21,10 @@ class ScopedErrorReport { void append(const char *Format, ...) { va_list Args; va_start(Args, Format); - Message.append(Format, Args); + Message.vappend(Format, Args); va_end(Args); } - NORETURN ~ScopedErrorReport() { - outputRaw(Message.data()); - setAbortMessage(Message.data()); - die(); - } + NORETURN ~ScopedErrorReport() { reportRawError(Message.data()); } private: ScopedString Message; @@ -55,6 +51,13 @@ void NORETURN reportError(const char *Message) { Report.append("%s\n", Message); } +// Generic fatal error message without ScopedString. +void NORETURN reportRawError(const char *Message) { + outputRaw(Message); + setAbortMessage(Message); + die(); +} + void NORETURN reportInvalidFlag(const char *FlagType, const char *Value) { ScopedErrorReport Report; Report.append("invalid value for %s option: '%s'\n", FlagType, Value); @@ -67,14 +70,6 @@ void NORETURN reportHeaderCorruption(void *Ptr) { Report.append("corrupted chunk header at address %p\n", Ptr); } -// Two threads have attempted to modify a chunk header at the same time. This is -// symptomatic of a race-condition in the application code, or general lack of -// proper locking. -void NORETURN reportHeaderRace(void *Ptr) { - ScopedErrorReport Report; - Report.append("race on chunk header at address %p\n", Ptr); -} - // The allocator was compiled with parameters that conflict with field size // requirements. void NORETURN reportSanityCheckError(const char *Field) { @@ -100,6 +95,11 @@ void NORETURN reportAllocationSizeTooBig(uptr UserSize, uptr TotalSize, UserSize, TotalSize, MaxSize); } +void NORETURN reportOutOfBatchClass() { + ScopedErrorReport Report; + Report.append("BatchClass region is used up, can't hold any free block\n"); +} + void NORETURN reportOutOfMemory(uptr RequestedSize) { ScopedErrorReport Report; Report.append("out of memory trying to allocate %zu bytes\n", RequestedSize); diff --git a/third_party/llvm-scudo-standalone/report.h b/third_party/llvm-scudo-standalone/report.h index 14e4e79..a510fda 100644 --- a/third_party/llvm-scudo-standalone/report.h +++ b/third_party/llvm-scudo-standalone/report.h @@ -15,15 +15,17 @@ namespace scudo { // Reports are *fatal* unless stated otherwise. -// Generic error. +// Generic error, adds newline to end of message. void NORETURN reportError(const char *Message); +// Generic error, but the message is not modified. +void NORETURN reportRawError(const char *Message); + // Flags related errors. void NORETURN reportInvalidFlag(const char *FlagType, const char *Value); // Chunk header related errors. void NORETURN reportHeaderCorruption(void *Ptr); -void NORETURN reportHeaderRace(void *Ptr); // Sanity checks related error. void NORETURN reportSanityCheckError(const char *Field); @@ -32,6 +34,7 @@ void NORETURN reportSanityCheckError(const char *Field); void NORETURN reportAlignmentTooBig(uptr Alignment, uptr MaxAlignment); void NORETURN reportAllocationSizeTooBig(uptr UserSize, uptr TotalSize, uptr MaxSize); +void NORETURN reportOutOfBatchClass(); void NORETURN reportOutOfMemory(uptr RequestedSize); enum class AllocatorAction : u8 { Recycling, diff --git a/third_party/llvm-scudo-standalone/report_linux.cpp b/third_party/llvm-scudo-standalone/report_linux.cpp new file mode 100644 index 0000000..6a98303 --- /dev/null +++ b/third_party/llvm-scudo-standalone/report_linux.cpp @@ -0,0 +1,58 @@ +//===-- report_linux.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_LINUX || SCUDO_TRUSTY + +#include "common.h" +#include "internal_defs.h" +#include "report.h" +#include "report_linux.h" +#include "string_utils.h" + +#include +#include +#include + +namespace scudo { + +// Fatal internal map() error (potentially OOM related). +void NORETURN reportMapError(uptr SizeIfOOM) { + char Error[128] = "Scudo ERROR: internal map failure\n"; + if (SizeIfOOM) { + formatString( + Error, sizeof(Error), + "Scudo ERROR: internal map failure (NO MEMORY) requesting %zuKB\n", + SizeIfOOM >> 10); + } + reportRawError(Error); +} + +void NORETURN reportUnmapError(uptr Addr, uptr Size) { + char Error[128]; + formatString(Error, sizeof(Error), + "Scudo ERROR: internal unmap failure (error desc=%s) Addr 0x%zx " + "Size %zu\n", + strerror(errno), Addr, Size); + reportRawError(Error); +} + +void NORETURN reportProtectError(uptr Addr, uptr Size, int Prot) { + char Error[128]; + formatString( + Error, sizeof(Error), + "Scudo ERROR: internal protect failure (error desc=%s) Addr 0x%zx " + "Size %zu Prot %x\n", + strerror(errno), Addr, Size, Prot); + reportRawError(Error); +} + +} // namespace scudo + +#endif // SCUDO_LINUX || SCUDO_TRUSTY diff --git a/third_party/llvm-scudo-standalone/report_linux.h b/third_party/llvm-scudo-standalone/report_linux.h new file mode 100644 index 0000000..aa0bb24 --- /dev/null +++ b/third_party/llvm-scudo-standalone/report_linux.h @@ -0,0 +1,34 @@ +//===-- report_linux.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_REPORT_LINUX_H_ +#define SCUDO_REPORT_LINUX_H_ + +#include "platform.h" + +#if SCUDO_LINUX || SCUDO_TRUSTY + +#include "internal_defs.h" + +namespace scudo { + +// Report a fatal error when a map call fails. SizeIfOOM shall +// hold the requested size on an out-of-memory error, 0 otherwise. +void NORETURN reportMapError(uptr SizeIfOOM = 0); + +// Report a fatal error when an unmap call fails. +void NORETURN reportUnmapError(uptr Addr, uptr Size); + +// Report a fatal error when a mprotect call fails. +void NORETURN reportProtectError(uptr Addr, uptr Size, int Prot); + +} // namespace scudo + +#endif // SCUDO_LINUX || SCUDO_TRUSTY + +#endif // SCUDO_REPORT_LINUX_H_ diff --git a/third_party/llvm-scudo-standalone/secondary.h b/third_party/llvm-scudo-standalone/secondary.h index 630e64d..f52a418 100644 --- a/third_party/llvm-scudo-standalone/secondary.h +++ b/third_party/llvm-scudo-standalone/secondary.h @@ -12,11 +12,13 @@ #include "chunk.h" #include "common.h" #include "list.h" +#include "mem_map.h" #include "memtag.h" #include "mutex.h" #include "options.h" #include "stats.h" #include "string_utils.h" +#include "thread_annotations.h" namespace scudo { @@ -36,9 +38,7 @@ struct alignas(Max(archSupportsMemoryTagging() LargeBlock::Header *Next; uptr CommitBase; uptr CommitSize; - uptr MapBase; - uptr MapSize; - [[no_unique_address]] MapPlatformData Data; + MemMapT MemMap; }; static_assert(sizeof(Header) % (1U << SCUDO_MIN_ALIGNMENT_LOG) == 0, ""); @@ -64,16 +64,34 @@ template static Header *getHeader(const void *Ptr) { } // namespace LargeBlock -static void unmap(LargeBlock::Header *H) { - MapPlatformData Data = H->Data; - unmap(reinterpret_cast(H->MapBase), H->MapSize, UNMAP_ALL, &Data); +static inline void unmap(LargeBlock::Header *H) { + // Note that the `H->MapMap` is stored on the pages managed by itself. Take + // over the ownership before unmap() so that any operation along with unmap() + // won't touch inaccessible pages. + MemMapT MemMap = H->MemMap; + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } -class MapAllocatorNoCache { +namespace { +struct CachedBlock { + uptr CommitBase = 0; + uptr CommitSize = 0; + uptr BlockBegin = 0; + MemMapT MemMap = {}; + u64 Time = 0; + + bool isValid() { return CommitBase != 0; } + + void invalidate() { CommitBase = 0; } +}; +} // namespace + +template class MapAllocatorNoCache { public: void init(UNUSED s32 ReleaseToOsInterval) {} bool retrieve(UNUSED Options Options, UNUSED uptr Size, UNUSED uptr Alignment, - UNUSED LargeBlock::Header **H, UNUSED bool *Zeroed) { + UNUSED uptr HeadersSize, UNUSED LargeBlock::Header **H, + UNUSED bool *Zeroed) { return false; } void store(UNUSED Options Options, LargeBlock::Header *H) { unmap(H); } @@ -90,61 +108,102 @@ class MapAllocatorNoCache { // Not supported by the Secondary Cache, but not an error either. return true; } + + void getStats(UNUSED ScopedString *Str) { + Str->append("Secondary Cache Disabled\n"); + } }; static const uptr MaxUnusedCachePages = 4U; template -void mapSecondary(Options Options, uptr CommitBase, uptr CommitSize, - uptr AllocPos, uptr Flags, MapPlatformData *Data) { +bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize, + uptr AllocPos, uptr Flags, MemMapT &MemMap) { + Flags |= MAP_RESIZABLE; + Flags |= MAP_ALLOWNOMEM; + const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * getPageSizeCached(); if (useMemoryTagging(Options) && CommitSize > MaxUnusedCacheBytes) { const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes); - map(reinterpret_cast(CommitBase), UntaggedPos - CommitBase, - "scudo:secondary", MAP_RESIZABLE | MAP_MEMTAG | Flags, Data); - map(reinterpret_cast(UntaggedPos), - CommitBase + CommitSize - UntaggedPos, "scudo:secondary", - MAP_RESIZABLE | Flags, Data); + return MemMap.remap(CommitBase, UntaggedPos - CommitBase, "scudo:secondary", + MAP_MEMTAG | Flags) && + MemMap.remap(UntaggedPos, CommitBase + CommitSize - UntaggedPos, + "scudo:secondary", Flags); } else { - map(reinterpret_cast(CommitBase), CommitSize, "scudo:secondary", - MAP_RESIZABLE | (useMemoryTagging(Options) ? MAP_MEMTAG : 0) | - Flags, - Data); + const uptr RemapFlags = + (useMemoryTagging(Options) ? MAP_MEMTAG : 0) | Flags; + return MemMap.remap(CommitBase, CommitSize, "scudo:secondary", RemapFlags); } } +// Template specialization to avoid producing zero-length array +template class NonZeroLengthArray { +public: + T &operator[](uptr Idx) { return values[Idx]; } + +private: + T values[Size]; +}; +template class NonZeroLengthArray { +public: + T &operator[](uptr UNUSED Idx) { UNREACHABLE("Unsupported!"); } +}; + template class MapAllocatorCache { public: + using CacheConfig = typename Config::Secondary::Cache; + + void getStats(ScopedString *Str) { + ScopedLock L(Mutex); + uptr Integral; + uptr Fractional; + computePercentage(SuccessfulRetrieves, CallsToRetrieve, &Integral, + &Fractional); + Str->append("Stats: MapAllocatorCache: EntriesCount: %d, " + "MaxEntriesCount: %u, MaxEntrySize: %zu\n", + EntriesCount, atomic_load_relaxed(&MaxEntriesCount), + atomic_load_relaxed(&MaxEntrySize)); + Str->append("Stats: CacheRetrievalStats: SuccessRate: %u/%u " + "(%zu.%02zu%%)\n", + SuccessfulRetrieves, CallsToRetrieve, Integral, Fractional); + for (CachedBlock Entry : Entries) { + if (!Entry.isValid()) + continue; + Str->append("StartBlockAddress: 0x%zx, EndBlockAddress: 0x%zx, " + "BlockSize: %zu %s\n", + Entry.CommitBase, Entry.CommitBase + Entry.CommitSize, + Entry.CommitSize, Entry.Time == 0 ? "[R]" : ""); + } + } + // Ensure the default maximum specified fits the array. - static_assert(Config::SecondaryCacheDefaultMaxEntriesCount <= - Config::SecondaryCacheEntriesArraySize, + static_assert(CacheConfig::DefaultMaxEntriesCount <= + CacheConfig::EntriesArraySize, ""); - void init(s32 ReleaseToOsInterval) { + void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(EntriesCount, 0U); setOption(Option::MaxCacheEntriesCount, - static_cast(Config::SecondaryCacheDefaultMaxEntriesCount)); + static_cast(CacheConfig::DefaultMaxEntriesCount)); setOption(Option::MaxCacheEntrySize, - static_cast(Config::SecondaryCacheDefaultMaxEntrySize)); + static_cast(CacheConfig::DefaultMaxEntrySize)); setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } - void store(Options Options, LargeBlock::Header *H) { + void store(const Options &Options, LargeBlock::Header *H) EXCLUDES(Mutex) { if (!canCache(H->CommitSize)) return unmap(H); bool EntryCached = false; bool EmptyCache = false; const s32 Interval = atomic_load_relaxed(&ReleaseToOsIntervalMs); - const u64 Time = getMonotonicTime(); + const u64 Time = getMonotonicTimeFast(); const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); CachedBlock Entry; Entry.CommitBase = H->CommitBase; Entry.CommitSize = H->CommitSize; - Entry.MapBase = H->MapBase; - Entry.MapSize = H->MapSize; Entry.BlockBegin = reinterpret_cast(H + 1); - Entry.Data = H->Data; + Entry.MemMap = H->MemMap; Entry.Time = Time; if (useMemoryTagging(Options)) { if (Interval == 0 && !SCUDO_FUCHSIA) { @@ -154,13 +213,13 @@ template class MapAllocatorCache { // on top so we just do the two syscalls there. Entry.Time = 0; mapSecondary(Options, Entry.CommitBase, Entry.CommitSize, - Entry.CommitBase, MAP_NOACCESS, &Entry.Data); + Entry.CommitBase, MAP_NOACCESS, Entry.MemMap); } else { - setMemoryPermission(Entry.CommitBase, Entry.CommitSize, MAP_NOACCESS, - &Entry.Data); + Entry.MemMap.setMemoryPermission(Entry.CommitBase, Entry.CommitSize, + MAP_NOACCESS); } } else if (Interval == 0) { - releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, Entry.CommitSize); Entry.Time = 0; } do { @@ -172,11 +231,10 @@ template class MapAllocatorCache { // just unmap it. break; } - if (Config::SecondaryCacheQuarantineSize && - useMemoryTagging(Options)) { + if (CacheConfig::QuarantineSize && useMemoryTagging(Options)) { QuarantinePos = - (QuarantinePos + 1) % Max(Config::SecondaryCacheQuarantineSize, 1u); - if (!Quarantine[QuarantinePos].CommitBase) { + (QuarantinePos + 1) % Max(CacheConfig::QuarantineSize, 1u); + if (!Quarantine[QuarantinePos].isValid()) { Quarantine[QuarantinePos] = Entry; return; } @@ -191,7 +249,7 @@ template class MapAllocatorCache { EmptyCache = true; } else { for (u32 I = 0; I < MaxCount; I++) { - if (Entries[I].CommitBase) + if (Entries[I].isValid()) continue; if (I != 0) Entries[I] = Entries[0]; @@ -209,66 +267,89 @@ template class MapAllocatorCache { else if (Interval >= 0) releaseOlderThan(Time - static_cast(Interval) * 1000000); if (!EntryCached) - unmap(reinterpret_cast(Entry.MapBase), Entry.MapSize, UNMAP_ALL, - &Entry.Data); + Entry.MemMap.unmap(Entry.MemMap.getBase(), Entry.MemMap.getCapacity()); } - bool retrieve(Options Options, uptr Size, uptr Alignment, - LargeBlock::Header **H, bool *Zeroed) { + bool retrieve(Options Options, uptr Size, uptr Alignment, uptr HeadersSize, + LargeBlock::Header **H, bool *Zeroed) EXCLUDES(Mutex) { const uptr PageSize = getPageSizeCached(); const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); + // 10% of the requested size proved to be the optimal choice for + // retrieving cached blocks after testing several options. + constexpr u32 FragmentedBytesDivisor = 10; bool Found = false; CachedBlock Entry; - uptr HeaderPos; + uptr EntryHeaderPos = 0; { ScopedLock L(Mutex); + CallsToRetrieve++; if (EntriesCount == 0) return false; + u32 OptimalFitIndex = 0; + uptr MinDiff = UINTPTR_MAX; for (u32 I = 0; I < MaxCount; I++) { - const uptr CommitBase = Entries[I].CommitBase; - if (!CommitBase) + if (!Entries[I].isValid()) continue; + const uptr CommitBase = Entries[I].CommitBase; const uptr CommitSize = Entries[I].CommitSize; const uptr AllocPos = - roundDownTo(CommitBase + CommitSize - Size, Alignment); - HeaderPos = - AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); + roundDown(CommitBase + CommitSize - Size, Alignment); + const uptr HeaderPos = AllocPos - HeadersSize; if (HeaderPos > CommitBase + CommitSize) continue; if (HeaderPos < CommitBase || - AllocPos > CommitBase + PageSize * MaxUnusedCachePages) + AllocPos > CommitBase + PageSize * MaxUnusedCachePages) { continue; + } Found = true; - Entry = Entries[I]; - Entries[I].CommitBase = 0; - break; + const uptr Diff = HeaderPos - CommitBase; + // immediately use a cached block if it's size is close enough to the + // requested size. + const uptr MaxAllowedFragmentedBytes = + (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor; + if (Diff <= MaxAllowedFragmentedBytes) { + OptimalFitIndex = I; + EntryHeaderPos = HeaderPos; + break; + } + // keep track of the smallest cached block + // that is greater than (AllocSize + HeaderSize) + if (Diff > MinDiff) + continue; + OptimalFitIndex = I; + MinDiff = Diff; + EntryHeaderPos = HeaderPos; + } + if (Found) { + Entry = Entries[OptimalFitIndex]; + Entries[OptimalFitIndex].invalidate(); + EntriesCount--; + SuccessfulRetrieves++; } } - if (Found) { - *H = reinterpret_cast( - LargeBlock::addHeaderTag(HeaderPos)); - *Zeroed = Entry.Time == 0; - if (useMemoryTagging(Options)) - setMemoryPermission(Entry.CommitBase, Entry.CommitSize, 0, &Entry.Data); - uptr NewBlockBegin = reinterpret_cast(*H + 1); - if (useMemoryTagging(Options)) { - if (*Zeroed) - storeTags(LargeBlock::addHeaderTag(Entry.CommitBase), - NewBlockBegin); - else if (Entry.BlockBegin < NewBlockBegin) - storeTags(Entry.BlockBegin, NewBlockBegin); - else - storeTags(untagPointer(NewBlockBegin), - untagPointer(Entry.BlockBegin)); + if (!Found) + return false; + + *H = reinterpret_cast( + LargeBlock::addHeaderTag(EntryHeaderPos)); + *Zeroed = Entry.Time == 0; + if (useMemoryTagging(Options)) + Entry.MemMap.setMemoryPermission(Entry.CommitBase, Entry.CommitSize, 0); + uptr NewBlockBegin = reinterpret_cast(*H + 1); + if (useMemoryTagging(Options)) { + if (*Zeroed) { + storeTags(LargeBlock::addHeaderTag(Entry.CommitBase), + NewBlockBegin); + } else if (Entry.BlockBegin < NewBlockBegin) { + storeTags(Entry.BlockBegin, NewBlockBegin); + } else { + storeTags(untagPointer(NewBlockBegin), untagPointer(Entry.BlockBegin)); } - (*H)->CommitBase = Entry.CommitBase; - (*H)->CommitSize = Entry.CommitSize; - (*H)->MapBase = Entry.MapBase; - (*H)->MapSize = Entry.MapSize; - (*H)->Data = Entry.Data; - EntriesCount--; } - return Found; + (*H)->CommitBase = Entry.CommitBase; + (*H)->CommitSize = Entry.CommitSize; + (*H)->MemMap = Entry.MemMap; + return true; } bool canCache(uptr Size) { @@ -278,16 +359,15 @@ template class MapAllocatorCache { bool setOption(Option O, sptr Value) { if (O == Option::ReleaseInterval) { - const s32 Interval = - Max(Min(static_cast(Value), - Config::SecondaryCacheMaxReleaseToOsIntervalMs), - Config::SecondaryCacheMinReleaseToOsIntervalMs); + const s32 Interval = Max( + Min(static_cast(Value), CacheConfig::MaxReleaseToOsIntervalMs), + CacheConfig::MinReleaseToOsIntervalMs); atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); return true; } if (O == Option::MaxCacheEntriesCount) { const u32 MaxCount = static_cast(Value); - if (MaxCount > Config::SecondaryCacheEntriesArraySize) + if (MaxCount > CacheConfig::EntriesArraySize) return false; atomic_store_relaxed(&MaxEntriesCount, MaxCount); return true; @@ -302,105 +382,96 @@ template class MapAllocatorCache { void releaseToOS() { releaseOlderThan(UINT64_MAX); } - void disableMemoryTagging() { + void disableMemoryTagging() EXCLUDES(Mutex) { ScopedLock L(Mutex); - for (u32 I = 0; I != Config::SecondaryCacheQuarantineSize; ++I) { - if (Quarantine[I].CommitBase) { - unmap(reinterpret_cast(Quarantine[I].MapBase), - Quarantine[I].MapSize, UNMAP_ALL, &Quarantine[I].Data); - Quarantine[I].CommitBase = 0; + for (u32 I = 0; I != CacheConfig::QuarantineSize; ++I) { + if (Quarantine[I].isValid()) { + MemMapT &MemMap = Quarantine[I].MemMap; + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + Quarantine[I].invalidate(); } } const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount); - for (u32 I = 0; I < MaxCount; I++) - if (Entries[I].CommitBase) - setMemoryPermission(Entries[I].CommitBase, Entries[I].CommitSize, 0, - &Entries[I].Data); + for (u32 I = 0; I < MaxCount; I++) { + if (Entries[I].isValid()) { + Entries[I].MemMap.setMemoryPermission(Entries[I].CommitBase, + Entries[I].CommitSize, 0); + } + } QuarantinePos = -1U; } - void disable() { Mutex.lock(); } + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); } - void enable() { Mutex.unlock(); } + void enable() NO_THREAD_SAFETY_ANALYSIS { Mutex.unlock(); } void unmapTestOnly() { empty(); } private: void empty() { - struct { - void *MapBase; - uptr MapSize; - MapPlatformData Data; - } MapInfo[Config::SecondaryCacheEntriesArraySize]; + MemMapT MapInfo[CacheConfig::EntriesArraySize]; uptr N = 0; { ScopedLock L(Mutex); - for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) { - if (!Entries[I].CommitBase) + for (uptr I = 0; I < CacheConfig::EntriesArraySize; I++) { + if (!Entries[I].isValid()) continue; - MapInfo[N].MapBase = reinterpret_cast(Entries[I].MapBase); - MapInfo[N].MapSize = Entries[I].MapSize; - MapInfo[N].Data = Entries[I].Data; - Entries[I].CommitBase = 0; + MapInfo[N] = Entries[I].MemMap; + Entries[I].invalidate(); N++; } EntriesCount = 0; IsFullEvents = 0; } - for (uptr I = 0; I < N; I++) - unmap(MapInfo[I].MapBase, MapInfo[I].MapSize, UNMAP_ALL, - &MapInfo[I].Data); + for (uptr I = 0; I < N; I++) { + MemMapT &MemMap = MapInfo[I]; + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + } } - struct CachedBlock { - uptr CommitBase; - uptr CommitSize; - uptr MapBase; - uptr MapSize; - uptr BlockBegin; - [[no_unique_address]] MapPlatformData Data; - u64 Time; - }; - - void releaseIfOlderThan(CachedBlock &Entry, u64 Time) { - if (!Entry.CommitBase || !Entry.Time) + void releaseIfOlderThan(CachedBlock &Entry, u64 Time) REQUIRES(Mutex) { + if (!Entry.isValid() || !Entry.Time) return; if (Entry.Time > Time) { if (OldestTime == 0 || Entry.Time < OldestTime) OldestTime = Entry.Time; return; } - releasePagesToOS(Entry.CommitBase, 0, Entry.CommitSize, &Entry.Data); + Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, Entry.CommitSize); Entry.Time = 0; } - void releaseOlderThan(u64 Time) { + void releaseOlderThan(u64 Time) EXCLUDES(Mutex) { ScopedLock L(Mutex); if (!EntriesCount || OldestTime == 0 || OldestTime > Time) return; OldestTime = 0; - for (uptr I = 0; I < Config::SecondaryCacheQuarantineSize; I++) + for (uptr I = 0; I < CacheConfig::QuarantineSize; I++) releaseIfOlderThan(Quarantine[I], Time); - for (uptr I = 0; I < Config::SecondaryCacheEntriesArraySize; I++) + for (uptr I = 0; I < CacheConfig::EntriesArraySize; I++) releaseIfOlderThan(Entries[I], Time); } HybridMutex Mutex; - u32 EntriesCount = 0; - u32 QuarantinePos = 0; + u32 EntriesCount GUARDED_BY(Mutex) = 0; + u32 QuarantinePos GUARDED_BY(Mutex) = 0; atomic_u32 MaxEntriesCount = {}; atomic_uptr MaxEntrySize = {}; - u64 OldestTime = 0; - u32 IsFullEvents = 0; + u64 OldestTime GUARDED_BY(Mutex) = 0; + u32 IsFullEvents GUARDED_BY(Mutex) = 0; atomic_s32 ReleaseToOsIntervalMs = {}; + u32 CallsToRetrieve GUARDED_BY(Mutex) = 0; + u32 SuccessfulRetrieves GUARDED_BY(Mutex) = 0; - CachedBlock Entries[Config::SecondaryCacheEntriesArraySize] = {}; - CachedBlock Quarantine[Config::SecondaryCacheQuarantineSize] = {}; + CachedBlock Entries[CacheConfig::EntriesArraySize] GUARDED_BY(Mutex) = {}; + NonZeroLengthArray + Quarantine GUARDED_BY(Mutex) = {}; }; template class MapAllocator { public: - void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) { + void init(GlobalStats *S, + s32 ReleaseToOsInterval = -1) NO_THREAD_SAFETY_ANALYSIS { DCHECK_EQ(AllocatedBytes, 0U); DCHECK_EQ(FreedBytes, 0U); Cache.init(ReleaseToOsInterval); @@ -409,11 +480,11 @@ template class MapAllocator { S->link(&Stats); } - void *allocate(Options Options, uptr Size, uptr AlignmentHint = 0, + void *allocate(const Options &Options, uptr Size, uptr AlignmentHint = 0, uptr *BlockEnd = nullptr, FillContentsMode FillContents = NoFill); - void deallocate(Options Options, void *Ptr); + void deallocate(const Options &Options, void *Ptr); static uptr getBlockEnd(void *Ptr) { auto *B = LargeBlock::getHeader(Ptr); @@ -424,19 +495,23 @@ template class MapAllocator { return getBlockEnd(Ptr) - reinterpret_cast(Ptr); } - void getStats(ScopedString *Str) const; + static constexpr uptr getHeadersSize() { + return Chunk::getHeaderSize() + LargeBlock::getHeaderSize(); + } - void disable() { + void disable() NO_THREAD_SAFETY_ANALYSIS { Mutex.lock(); Cache.disable(); } - void enable() { + void enable() NO_THREAD_SAFETY_ANALYSIS { Cache.enable(); Mutex.unlock(); } template void iterateOverBlocks(F Callback) const { + Mutex.assertHeld(); + for (const auto &H : InUseBlocks) { uptr Ptr = reinterpret_cast(&H) + LargeBlock::getHeaderSize(); if (allocatorSupportsMemoryTagging()) @@ -445,7 +520,7 @@ template class MapAllocator { } } - uptr canCache(uptr Size) { return Cache.canCache(Size); } + bool canCache(uptr Size) { return Cache.canCache(Size); } bool setOption(Option O, sptr Value) { return Cache.setOption(O, Value); } @@ -455,17 +530,20 @@ template class MapAllocator { void unmapTestOnly() { Cache.unmapTestOnly(); } -private: - typename Config::SecondaryCache Cache; + void getStats(ScopedString *Str); - HybridMutex Mutex; - DoublyLinkedList InUseBlocks; - uptr AllocatedBytes = 0; - uptr FreedBytes = 0; - uptr LargestSize = 0; - u32 NumberOfAllocs = 0; - u32 NumberOfFrees = 0; - LocalStats Stats; +private: + typename Config::Secondary::template CacheT Cache; + + mutable HybridMutex Mutex; + DoublyLinkedList InUseBlocks GUARDED_BY(Mutex); + uptr AllocatedBytes GUARDED_BY(Mutex) = 0; + uptr FreedBytes GUARDED_BY(Mutex) = 0; + uptr FragmentedBytes GUARDED_BY(Mutex) = 0; + uptr LargestSize GUARDED_BY(Mutex) = 0; + u32 NumberOfAllocs GUARDED_BY(Mutex) = 0; + u32 NumberOfFrees GUARDED_BY(Mutex) = 0; + LocalStats Stats GUARDED_BY(Mutex); }; // As with the Primary, the size passed to this function includes any desired @@ -480,24 +558,23 @@ template class MapAllocator { // the committed memory will amount to something close to Size - AlignmentHint // (pending rounding and headers). template -void *MapAllocator::allocate(Options Options, uptr Size, uptr Alignment, - uptr *BlockEndPtr, +void *MapAllocator::allocate(const Options &Options, uptr Size, + uptr Alignment, uptr *BlockEndPtr, FillContentsMode FillContents) { if (Options.get(OptionBit::AddLargeAllocationSlack)) Size += 1UL << SCUDO_MIN_ALIGNMENT_LOG; - Alignment = Max(Alignment, 1UL << SCUDO_MIN_ALIGNMENT_LOG); + Alignment = Max(Alignment, uptr(1U) << SCUDO_MIN_ALIGNMENT_LOG); const uptr PageSize = getPageSizeCached(); - uptr RoundedSize = - roundUpTo(roundUpTo(Size, Alignment) + LargeBlock::getHeaderSize() + - Chunk::getHeaderSize(), - PageSize); - if (Alignment > PageSize) - RoundedSize += Alignment - PageSize; - if (Alignment < PageSize && Cache.canCache(RoundedSize)) { + // Note that cached blocks may have aligned address already. Thus we simply + // pass the required size (`Size` + `getHeadersSize()`) to do cache look up. + const uptr MinNeededSizeForCache = roundUp(Size + getHeadersSize(), PageSize); + + if (Alignment < PageSize && Cache.canCache(MinNeededSizeForCache)) { LargeBlock::Header *H; bool Zeroed; - if (Cache.retrieve(Options, Size, Alignment, &H, &Zeroed)) { + if (Cache.retrieve(Options, Size, Alignment, getHeadersSize(), &H, + &Zeroed)) { const uptr BlockEnd = H->CommitBase + H->CommitSize; if (BlockEndPtr) *BlockEndPtr = BlockEnd; @@ -509,25 +586,35 @@ void *MapAllocator::allocate(Options Options, uptr Size, uptr Alignment, if (FillContents && !Zeroed) memset(Ptr, FillContents == ZeroFill ? 0 : PatternFillByte, BlockEnd - PtrInt); - const uptr BlockSize = BlockEnd - HInt; { ScopedLock L(Mutex); InUseBlocks.push_back(H); - AllocatedBytes += BlockSize; + AllocatedBytes += H->CommitSize; + FragmentedBytes += H->MemMap.getCapacity() - H->CommitSize; NumberOfAllocs++; - Stats.add(StatAllocated, BlockSize); - Stats.add(StatMapped, H->MapSize); + Stats.add(StatAllocated, H->CommitSize); + Stats.add(StatMapped, H->MemMap.getCapacity()); } return Ptr; } } - MapPlatformData Data = {}; + uptr RoundedSize = + roundUp(roundUp(Size, Alignment) + getHeadersSize(), PageSize); + if (Alignment > PageSize) + RoundedSize += Alignment - PageSize; + + ReservedMemoryT ReservedMemory; const uptr MapSize = RoundedSize + 2 * PageSize; - uptr MapBase = reinterpret_cast( - map(nullptr, MapSize, nullptr, MAP_NOACCESS | MAP_ALLOWNOMEM, &Data)); - if (UNLIKELY(!MapBase)) + if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, MapSize, nullptr, + MAP_ALLOWNOMEM))) { return nullptr; + } + + // Take the entire ownership of reserved region. + MemMapT MemMap = ReservedMemory.dispatch(ReservedMemory.getBase(), + ReservedMemory.getCapacity()); + uptr MapBase = MemMap.getBase(); uptr CommitBase = MapBase + PageSize; uptr MapEnd = MapBase + MapSize; @@ -537,77 +624,83 @@ void *MapAllocator::allocate(Options Options, uptr Size, uptr Alignment, // For alignments greater than or equal to a page, the user pointer (eg: the // pointer that is returned by the C or C++ allocation APIs) ends up on a // page boundary , and our headers will live in the preceding page. - CommitBase = roundUpTo(MapBase + PageSize + 1, Alignment) - PageSize; + CommitBase = roundUp(MapBase + PageSize + 1, Alignment) - PageSize; const uptr NewMapBase = CommitBase - PageSize; DCHECK_GE(NewMapBase, MapBase); // We only trim the extra memory on 32-bit platforms: 64-bit platforms // are less constrained memory wise, and that saves us two syscalls. if (SCUDO_WORDSIZE == 32U && NewMapBase != MapBase) { - unmap(reinterpret_cast(MapBase), NewMapBase - MapBase, 0, &Data); + MemMap.unmap(MapBase, NewMapBase - MapBase); MapBase = NewMapBase; } const uptr NewMapEnd = - CommitBase + PageSize + roundUpTo(Size, PageSize) + PageSize; + CommitBase + PageSize + roundUp(Size, PageSize) + PageSize; DCHECK_LE(NewMapEnd, MapEnd); if (SCUDO_WORDSIZE == 32U && NewMapEnd != MapEnd) { - unmap(reinterpret_cast(NewMapEnd), MapEnd - NewMapEnd, 0, &Data); + MemMap.unmap(NewMapEnd, MapEnd - NewMapEnd); MapEnd = NewMapEnd; } } const uptr CommitSize = MapEnd - PageSize - CommitBase; - const uptr AllocPos = roundDownTo(CommitBase + CommitSize - Size, Alignment); - mapSecondary(Options, CommitBase, CommitSize, AllocPos, 0, &Data); - const uptr HeaderPos = - AllocPos - Chunk::getHeaderSize() - LargeBlock::getHeaderSize(); + const uptr AllocPos = roundDown(CommitBase + CommitSize - Size, Alignment); + if (!mapSecondary(Options, CommitBase, CommitSize, AllocPos, 0, + MemMap)) { + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + return nullptr; + } + const uptr HeaderPos = AllocPos - getHeadersSize(); LargeBlock::Header *H = reinterpret_cast( LargeBlock::addHeaderTag(HeaderPos)); if (useMemoryTagging(Options)) storeTags(LargeBlock::addHeaderTag(CommitBase), reinterpret_cast(H + 1)); - H->MapBase = MapBase; - H->MapSize = MapEnd - MapBase; H->CommitBase = CommitBase; H->CommitSize = CommitSize; - H->Data = Data; + H->MemMap = MemMap; if (BlockEndPtr) *BlockEndPtr = CommitBase + CommitSize; { ScopedLock L(Mutex); InUseBlocks.push_back(H); AllocatedBytes += CommitSize; + FragmentedBytes += H->MemMap.getCapacity() - CommitSize; if (LargestSize < CommitSize) LargestSize = CommitSize; NumberOfAllocs++; Stats.add(StatAllocated, CommitSize); - Stats.add(StatMapped, H->MapSize); + Stats.add(StatMapped, H->MemMap.getCapacity()); } return reinterpret_cast(HeaderPos + LargeBlock::getHeaderSize()); } template -void MapAllocator::deallocate(Options Options, void *Ptr) { +void MapAllocator::deallocate(const Options &Options, void *Ptr) + EXCLUDES(Mutex) { LargeBlock::Header *H = LargeBlock::getHeader(Ptr); const uptr CommitSize = H->CommitSize; { ScopedLock L(Mutex); InUseBlocks.remove(H); FreedBytes += CommitSize; + FragmentedBytes -= H->MemMap.getCapacity() - CommitSize; NumberOfFrees++; Stats.sub(StatAllocated, CommitSize); - Stats.sub(StatMapped, H->MapSize); + Stats.sub(StatMapped, H->MemMap.getCapacity()); } Cache.store(Options, H); } template -void MapAllocator::getStats(ScopedString *Str) const { - Str->append( - "Stats: MapAllocator: allocated %zu times (%zuK), freed %zu times " - "(%zuK), remains %zu (%zuK) max %zuM\n", - NumberOfAllocs, AllocatedBytes >> 10, NumberOfFrees, FreedBytes >> 10, - NumberOfAllocs - NumberOfFrees, (AllocatedBytes - FreedBytes) >> 10, - LargestSize >> 20); +void MapAllocator::getStats(ScopedString *Str) EXCLUDES(Mutex) { + ScopedLock L(Mutex); + Str->append("Stats: MapAllocator: allocated %u times (%zuK), freed %u times " + "(%zuK), remains %u (%zuK) max %zuM, Fragmented %zuK\n", + NumberOfAllocs, AllocatedBytes >> 10, NumberOfFrees, + FreedBytes >> 10, NumberOfAllocs - NumberOfFrees, + (AllocatedBytes - FreedBytes) >> 10, LargestSize >> 20, + FragmentedBytes >> 10); + Cache.getStats(Str); } } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/size_class_map.h b/third_party/llvm-scudo-standalone/size_class_map.h index ba0f784..4138885 100644 --- a/third_party/llvm-scudo-standalone/size_class_map.h +++ b/third_party/llvm-scudo-standalone/size_class_map.h @@ -23,7 +23,7 @@ inline uptr scaledLog2(uptr Size, uptr ZeroLog, uptr LogBits) { } template struct SizeClassMapBase { - static u32 getMaxCachedHint(uptr Size) { + static u16 getMaxCachedHint(uptr Size) { DCHECK_NE(Size, 0); u32 N; // Force a 32-bit division if the template parameters allow for it. @@ -31,7 +31,10 @@ template struct SizeClassMapBase { N = static_cast((1UL << Config::MaxBytesCachedLog) / Size); else N = (1U << Config::MaxBytesCachedLog) / static_cast(Size); - return Max(1U, Min(Config::MaxNumCachedHint, N)); + + // Note that Config::MaxNumCachedHint is u16 so the result is guaranteed to + // fit in u16. + return static_cast(Max(1U, Min(Config::MaxNumCachedHint, N))); } }; @@ -65,7 +68,7 @@ class FixedSizeClassMap : public SizeClassMapBase { static const uptr M = (1UL << S) - 1; public: - static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; + static const u16 MaxNumCachedHint = Config::MaxNumCachedHint; static const uptr MaxSize = (1UL << Config::MaxSizeLog) + Config::SizeDelta; static const uptr NumClasses = @@ -99,7 +102,7 @@ class FixedSizeClassMap : public SizeClassMapBase { return MidClass + 1 + scaledLog2(Size - 1, Config::MidSizeLog, S); } - static u32 getMaxCachedHint(uptr Size) { + static u16 getMaxCachedHint(uptr Size) { DCHECK_LE(Size, MaxSize); return Base::getMaxCachedHint(Size); } @@ -178,7 +181,7 @@ class TableSizeClassMap : public SizeClassMapBase { static constexpr LSBTable LTable = {}; public: - static const u32 MaxNumCachedHint = Config::MaxNumCachedHint; + static const u16 MaxNumCachedHint = Config::MaxNumCachedHint; static const uptr NumClasses = ClassesSize + 1; static_assert(NumClasses < 256, ""); @@ -212,7 +215,7 @@ class TableSizeClassMap : public SizeClassMapBase { return SzTable.Tab[scaledLog2(Size - 1, Config::MidSizeLog, S)]; } - static u32 getMaxCachedHint(uptr Size) { + static u16 getMaxCachedHint(uptr Size) { DCHECK_LE(Size, MaxSize); return Base::getMaxCachedHint(Size); } @@ -223,7 +226,7 @@ struct DefaultSizeClassConfig { static const uptr MinSizeLog = 5; static const uptr MidSizeLog = 8; static const uptr MaxSizeLog = 17; - static const u32 MaxNumCachedHint = 14; + static const u16 MaxNumCachedHint = 14; static const uptr MaxBytesCachedLog = 10; static const uptr SizeDelta = 0; }; @@ -235,7 +238,7 @@ struct FuchsiaSizeClassConfig { static const uptr MinSizeLog = 5; static const uptr MidSizeLog = 8; static const uptr MaxSizeLog = 17; - static const u32 MaxNumCachedHint = 10; + static const u16 MaxNumCachedHint = 12; static const uptr MaxBytesCachedLog = 10; static const uptr SizeDelta = Chunk::getHeaderSize(); }; @@ -248,10 +251,10 @@ struct AndroidSizeClassConfig { static const uptr MinSizeLog = 4; static const uptr MidSizeLog = 6; static const uptr MaxSizeLog = 16; - static const u32 MaxNumCachedHint = 13; + static const u16 MaxNumCachedHint = 13; static const uptr MaxBytesCachedLog = 13; - static constexpr u32 Classes[] = { + static constexpr uptr Classes[] = { 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00090, 0x000b0, 0x000c0, 0x000e0, 0x00120, 0x00160, 0x001c0, 0x00250, 0x00320, 0x00450, 0x00670, 0x00830, 0x00a10, 0x00c30, 0x01010, 0x01210, 0x01bd0, 0x02210, @@ -263,10 +266,10 @@ struct AndroidSizeClassConfig { static const uptr MinSizeLog = 4; static const uptr MidSizeLog = 7; static const uptr MaxSizeLog = 16; - static const u32 MaxNumCachedHint = 14; + static const u16 MaxNumCachedHint = 14; static const uptr MaxBytesCachedLog = 13; - static constexpr u32 Classes[] = { + static constexpr uptr Classes[] = { 0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00080, 0x00090, 0x000a0, 0x000b0, 0x000c0, 0x000e0, 0x000f0, 0x00110, 0x00120, 0x00130, 0x00150, 0x00160, 0x00170, 0x00190, 0x001d0, 0x00210, 0x00240, 0x002a0, @@ -286,36 +289,12 @@ typedef TableSizeClassMap AndroidSizeClassMap; static_assert(AndroidSizeClassMap::usesCompressedLSBFormat(), ""); #endif -struct SvelteSizeClassConfig { -#if SCUDO_WORDSIZE == 64U - static const uptr NumBits = 4; - static const uptr MinSizeLog = 4; - static const uptr MidSizeLog = 8; - static const uptr MaxSizeLog = 14; - static const u32 MaxNumCachedHint = 13; - static const uptr MaxBytesCachedLog = 10; - static const uptr SizeDelta = Chunk::getHeaderSize(); -#else - static const uptr NumBits = 4; - static const uptr MinSizeLog = 3; - static const uptr MidSizeLog = 7; - static const uptr MaxSizeLog = 14; - static const u32 MaxNumCachedHint = 14; - static const uptr MaxBytesCachedLog = 10; - static const uptr SizeDelta = Chunk::getHeaderSize(); -#endif -}; - -typedef FixedSizeClassMap SvelteSizeClassMap; - -// Trusty is configured to only have one region containing blocks of size -// 2^7 bytes. struct TrustySizeClassConfig { static const uptr NumBits = 1; - static const uptr MinSizeLog = 7; - static const uptr MidSizeLog = 7; - static const uptr MaxSizeLog = 7; - static const u32 MaxNumCachedHint = 8; + static const uptr MinSizeLog = 5; + static const uptr MidSizeLog = 5; + static const uptr MaxSizeLog = 15; + static const u16 MaxNumCachedHint = 12; static const uptr MaxBytesCachedLog = 10; static const uptr SizeDelta = 0; }; @@ -335,8 +314,8 @@ template inline void printMap() { const uptr L = S ? getMostSignificantSetBitIndex(S) : 0; const uptr Cached = SCMap::getMaxCachedHint(S) * S; Buffer.append( - "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %zu %zu; id %zu\n", - I, S, D, P, L, SCMap::getMaxCachedHint(S), Cached, + "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %u %zu; id %zu\n", I, + S, D, P, L, SCMap::getMaxCachedHint(S), Cached, SCMap::getClassIdBySize(S)); TotalCached += Cached; PrevS = S; @@ -345,7 +324,7 @@ template inline void printMap() { Buffer.output(); } -template static void validateMap() { +template static UNUSED void validateMap() { for (uptr C = 0; C < SCMap::NumClasses; C++) { if (C == SCMap::BatchClassId) continue; diff --git a/third_party/llvm-scudo-standalone/stack_depot.h b/third_party/llvm-scudo-standalone/stack_depot.h index 458198f..12c35eb 100644 --- a/third_party/llvm-scudo-standalone/stack_depot.h +++ b/third_party/llvm-scudo-standalone/stack_depot.h @@ -62,8 +62,7 @@ class StackDepot { // This is achieved by re-checking the hash of the stack trace before // returning the trace. -#ifdef SCUDO_FUZZ - // Use smaller table sizes for fuzzing in order to reduce input size. +#if SCUDO_SMALL_STACK_DEPOT static const uptr TabBits = 4; #else static const uptr TabBits = 16; @@ -72,7 +71,7 @@ class StackDepot { static const uptr TabMask = TabSize - 1; atomic_u32 Tab[TabSize] = {}; -#ifdef SCUDO_FUZZ +#if SCUDO_SMALL_STACK_DEPOT static const uptr RingBits = 4; #else static const uptr RingBits = 19; diff --git a/third_party/llvm-scudo-standalone/stats.h b/third_party/llvm-scudo-standalone/stats.h index be5bf2d..658b758 100644 --- a/third_party/llvm-scudo-standalone/stats.h +++ b/third_party/llvm-scudo-standalone/stats.h @@ -12,6 +12,7 @@ #include "atomic_helpers.h" #include "list.h" #include "mutex.h" +#include "thread_annotations.h" #include @@ -60,19 +61,19 @@ class GlobalStats : public LocalStats { public: void init() { LocalStats::init(); } - void link(LocalStats *S) { + void link(LocalStats *S) EXCLUDES(Mutex) { ScopedLock L(Mutex); StatsList.push_back(S); } - void unlink(LocalStats *S) { + void unlink(LocalStats *S) EXCLUDES(Mutex) { ScopedLock L(Mutex); StatsList.remove(S); for (uptr I = 0; I < StatCount; I++) add(static_cast(I), S->get(static_cast(I))); } - void get(uptr *S) const { + void get(uptr *S) const EXCLUDES(Mutex) { ScopedLock L(Mutex); for (uptr I = 0; I < StatCount; I++) S[I] = LocalStats::get(static_cast(I)); @@ -85,15 +86,15 @@ class GlobalStats : public LocalStats { S[I] = static_cast(S[I]) >= 0 ? S[I] : 0; } - void lock() { Mutex.lock(); } - void unlock() { Mutex.unlock(); } + void lock() ACQUIRE(Mutex) { Mutex.lock(); } + void unlock() RELEASE(Mutex) { Mutex.unlock(); } - void disable() { lock(); } - void enable() { unlock(); } + void disable() ACQUIRE(Mutex) { lock(); } + void enable() RELEASE(Mutex) { unlock(); } private: mutable HybridMutex Mutex; - DoublyLinkedList StatsList; + DoublyLinkedList StatsList GUARDED_BY(Mutex); }; } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/string_utils.cpp b/third_party/llvm-scudo-standalone/string_utils.cpp index acf8588..d4e4e3b 100644 --- a/third_party/llvm-scudo-standalone/string_utils.cpp +++ b/third_party/llvm-scudo-standalone/string_utils.cpp @@ -195,6 +195,28 @@ static int formatString(char *Buffer, uptr BufferLength, const char *Format, appendChar(&Buffer, BufferEnd, static_cast(va_arg(Args, int))); break; } + // In Scudo, `s64`/`u64` are supposed to use `lld` and `llu` respectively. + // However, `-Wformat` doesn't know we have a different parser for those + // placeholders and it keeps complaining the type mismatch on 64-bit + // platform which uses `ld`/`lu` for `s64`/`u64`. Therefore, in order to + // silence the warning, we turn to use `PRId64`/`PRIu64` for printing + // `s64`/`u64` and handle the `ld`/`lu` here. + case 'l': { + ++Cur; + RAW_CHECK(*Cur == 'd' || *Cur == 'u'); + + if (*Cur == 'd') { + DVal = va_arg(Args, s64); + Res += + appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero); + } else { + UVal = va_arg(Args, u64); + Res += appendUnsigned(&Buffer, BufferEnd, UVal, 10, Width, PadWithZero, + false); + } + + break; + } case '%': { RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp); Res += appendChar(&Buffer, BufferEnd, '%'); @@ -218,7 +240,7 @@ int formatString(char *Buffer, uptr BufferLength, const char *Format, ...) { return Res; } -void ScopedString::append(const char *Format, va_list Args) { +void ScopedString::vappend(const char *Format, va_list Args) { va_list ArgsCopy; va_copy(ArgsCopy, Args); // formatString doesn't currently support a null buffer or zero buffer length, @@ -236,20 +258,18 @@ void ScopedString::append(const char *Format, va_list Args) { va_end(ArgsCopy); } -FORMAT(2, 3) void ScopedString::append(const char *Format, ...) { va_list Args; va_start(Args, Format); - append(Format, Args); + vappend(Format, Args); va_end(Args); } -FORMAT(1, 2) void Printf(const char *Format, ...) { va_list Args; va_start(Args, Format); ScopedString Msg; - Msg.append(Format, Args); + Msg.vappend(Format, Args); outputRaw(Msg.data()); va_end(Args); } diff --git a/third_party/llvm-scudo-standalone/string_utils.h b/third_party/llvm-scudo-standalone/string_utils.h index 06d23d4..a4cab52 100644 --- a/third_party/llvm-scudo-standalone/string_utils.h +++ b/third_party/llvm-scudo-standalone/string_utils.h @@ -25,16 +25,18 @@ class ScopedString { String.clear(); String.push_back('\0'); } - void append(const char *Format, va_list Args); - void append(const char *Format, ...); + void vappend(const char *Format, va_list Args); + void append(const char *Format, ...) FORMAT(2, 3); void output() const { outputRaw(String.data()); } + void reserve(size_t Size) { String.reserve(Size + 1); } private: Vector String; }; -int formatString(char *Buffer, uptr BufferLength, const char *Format, ...); -void Printf(const char *Format, ...); +int formatString(char *Buffer, uptr BufferLength, const char *Format, ...) + FORMAT(3, 4); +void Printf(const char *Format, ...) FORMAT(1, 2); } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/tests/CMakeLists.txt b/third_party/llvm-scudo-standalone/tests/CMakeLists.txt index eaa47a0..c6b6a1c 100644 --- a/third_party/llvm-scudo-standalone/tests/CMakeLists.txt +++ b/third_party/llvm-scudo-standalone/tests/CMakeLists.txt @@ -7,6 +7,7 @@ set_target_properties(ScudoUnitTests PROPERTIES set(SCUDO_UNITTEST_CFLAGS ${COMPILER_RT_UNITTEST_CFLAGS} ${COMPILER_RT_GTEST_CFLAGS} + ${SANITIZER_TEST_CXX_CFLAGS} -I${COMPILER_RT_SOURCE_DIR}/include -I${COMPILER_RT_SOURCE_DIR}/lib -I${COMPILER_RT_SOURCE_DIR}/lib/scudo/standalone @@ -14,11 +15,15 @@ set(SCUDO_UNITTEST_CFLAGS -DGTEST_HAS_RTTI=0 -g # Extra flags for the C++ tests + -Wconversion # TODO(kostyak): find a way to make -fsized-deallocation work -Wno-mismatched-new-delete) if(COMPILER_RT_DEBUG) - list(APPEND SCUDO_UNITTEST_CFLAGS -DSCUDO_DEBUG=1) + list(APPEND SCUDO_UNITTEST_CFLAGS -DSCUDO_DEBUG=1 -DSCUDO_ENABLE_HOOKS=1) + if (NOT FUCHSIA) + list(APPEND SCUDO_UNITTEST_CFLAGS -DSCUDO_ENABLE_HOOKS_TESTS=1) + endif() endif() if(ANDROID) @@ -30,18 +35,21 @@ if (COMPILER_RT_HAS_GWP_ASAN) -mno-omit-leaf-frame-pointer) endif() +append_list_if(COMPILER_RT_HAS_WTHREAD_SAFETY_FLAG -Werror=thread-safety + SCUDO_UNITTEST_CFLAGS) + set(SCUDO_TEST_ARCH ${SCUDO_STANDALONE_SUPPORTED_ARCH}) # gtests requires c++ -set(LINK_FLAGS ${COMPILER_RT_UNITTEST_LINK_FLAGS}) -foreach(lib ${SANITIZER_TEST_CXX_LIBRARIES}) - list(APPEND LINK_FLAGS -l${lib}) -endforeach() -list(APPEND LINK_FLAGS -pthread) +set(SCUDO_UNITTEST_LINK_FLAGS + ${COMPILER_RT_UNITTEST_LINK_FLAGS} + ${COMPILER_RT_UNWINDER_LINK_LIBS} + ${SANITIZER_TEST_CXX_LIBRARIES}) +list(APPEND SCUDO_UNITTEST_LINK_FLAGS -pthread -no-pie) # Linking against libatomic is required with some compilers check_library_exists(atomic __atomic_load_8 "" COMPILER_RT_HAS_LIBATOMIC) if (COMPILER_RT_HAS_LIBATOMIC) - list(APPEND LINK_FLAGS -latomic) + list(APPEND SCUDO_UNITTEST_LINK_FLAGS -latomic) endif() set(SCUDO_TEST_HEADERS @@ -73,10 +81,10 @@ macro(add_scudo_unittest testname) "${testname}-${arch}-Test" ${arch} SOURCES ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE} COMPILE_DEPS ${SCUDO_TEST_HEADERS} - DEPS gtest scudo_standalone + DEPS llvm_gtest scudo_standalone RUNTIME ${RUNTIME} CFLAGS ${SCUDO_UNITTEST_CFLAGS} - LINK_FLAGS ${LINK_FLAGS}) + LINK_FLAGS ${SCUDO_UNITTEST_LINK_FLAGS}) endforeach() endif() endmacro() @@ -88,6 +96,7 @@ set(SCUDO_UNIT_TEST_SOURCES chunk_test.cpp combined_test.cpp common_test.cpp + condition_variable_test.cpp flags_test.cpp list_test.cpp map_test.cpp @@ -101,11 +110,18 @@ set(SCUDO_UNIT_TEST_SOURCES size_class_map_test.cpp stats_test.cpp strings_test.cpp + timing_test.cpp tsd_test.cpp vector_test.cpp scudo_unit_test_main.cpp ) +# Temporary hack until LLVM libc supports inttypes.h print format macros +# See: https://github.com/llvm/llvm-project/issues/63317#issuecomment-1591906241 +if(LLVM_LIBC_INCLUDE_SCUDO) + list(REMOVE_ITEM SCUDO_UNIT_TEST_SOURCES timing_test.cpp) +endif() + add_scudo_unittest(ScudoUnitTest SOURCES ${SCUDO_UNIT_TEST_SOURCES}) diff --git a/third_party/llvm-scudo-standalone/tests/checksum_test.cpp b/third_party/llvm-scudo-standalone/tests/checksum_test.cpp index 781f990..c5d5b73 100644 --- a/third_party/llvm-scudo-standalone/tests/checksum_test.cpp +++ b/third_party/llvm-scudo-standalone/tests/checksum_test.cpp @@ -12,16 +12,16 @@ #include -scudo::u16 computeSoftwareChecksum(scudo::u32 Seed, scudo::uptr *Array, - scudo::uptr ArraySize) { +static scudo::u16 computeSoftwareChecksum(scudo::u32 Seed, scudo::uptr *Array, + scudo::uptr ArraySize) { scudo::u16 Checksum = static_cast(Seed & 0xffff); for (scudo::uptr I = 0; I < ArraySize; I++) Checksum = scudo::computeBSDChecksum(Checksum, Array[I]); return Checksum; } -scudo::u16 computeHardwareChecksum(scudo::u32 Seed, scudo::uptr *Array, - scudo::uptr ArraySize) { +static scudo::u16 computeHardwareChecksum(scudo::u32 Seed, scudo::uptr *Array, + scudo::uptr ArraySize) { scudo::u32 Crc = Seed; for (scudo::uptr I = 0; I < ArraySize; I++) Crc = scudo::computeHardwareCRC32(Crc, Array[I]); @@ -32,7 +32,7 @@ typedef scudo::u16 (*ComputeChecksum)(scudo::u32, scudo::uptr *, scudo::uptr); // This verifies that flipping bits in the data being checksummed produces a // different checksum. We do not use random data to avoid flakyness. -template void verifyChecksumFunctionBitFlip() { +template static void verifyChecksumFunctionBitFlip() { scudo::uptr Array[sizeof(scudo::u64) / sizeof(scudo::uptr)]; const scudo::uptr ArraySize = ARRAY_SIZE(Array); memset(Array, 0xaa, sizeof(Array)); diff --git a/third_party/llvm-scudo-standalone/tests/chunk_test.cpp b/third_party/llvm-scudo-standalone/tests/chunk_test.cpp index 7a29f3c..1b2c1eb 100644 --- a/third_party/llvm-scudo-standalone/tests/chunk_test.cpp +++ b/third_party/llvm-scudo-standalone/tests/chunk_test.cpp @@ -37,29 +37,6 @@ TEST(ScudoChunkDeathTest, ChunkBasic) { free(Block); } -TEST(ScudoChunkTest, ChunkCmpXchg) { - initChecksum(); - const scudo::uptr Size = 0x100U; - scudo::Chunk::UnpackedHeader OldHeader = {}; - OldHeader.OriginOrWasZeroed = scudo::Chunk::Origin::Malloc; - OldHeader.ClassId = 0x42U; - OldHeader.SizeOrUnusedBytes = Size; - OldHeader.State = scudo::Chunk::State::Allocated; - void *Block = malloc(HeaderSize + Size); - void *P = reinterpret_cast(reinterpret_cast(Block) + - HeaderSize); - scudo::Chunk::storeHeader(Cookie, P, &OldHeader); - memset(P, 'A', Size); - scudo::Chunk::UnpackedHeader NewHeader = OldHeader; - NewHeader.State = scudo::Chunk::State::Quarantined; - scudo::Chunk::compareExchangeHeader(Cookie, P, &NewHeader, &OldHeader); - NewHeader = {}; - EXPECT_TRUE(scudo::Chunk::isValid(Cookie, P, &NewHeader)); - EXPECT_EQ(NewHeader.State, scudo::Chunk::State::Quarantined); - EXPECT_FALSE(scudo::Chunk::isValid(InvalidCookie, P, &NewHeader)); - free(Block); -} - TEST(ScudoChunkDeathTest, CorruptHeader) { initChecksum(); const scudo::uptr Size = 0x100U; diff --git a/third_party/llvm-scudo-standalone/tests/combined_test.cpp b/third_party/llvm-scudo-standalone/tests/combined_test.cpp index a2461c5..3dbd93c 100644 --- a/third_party/llvm-scudo-standalone/tests/combined_test.cpp +++ b/third_party/llvm-scudo-standalone/tests/combined_test.cpp @@ -10,8 +10,13 @@ #include "tests/scudo_unit_test.h" #include "allocator_config.h" +#include "chunk.h" #include "combined.h" +#include "condition_variable.h" +#include "mem_map.h" +#include "size_class_map.h" +#include #include #include #include @@ -38,7 +43,7 @@ bool isPrimaryAllocation(scudo::uptr Size, scudo::uptr Alignment) { if (Alignment < MinAlignment) Alignment = MinAlignment; const scudo::uptr NeededSize = - scudo::roundUpTo(Size, MinAlignment) + + scudo::roundUp(Size, MinAlignment) + ((Alignment > MinAlignment) ? Alignment : scudo::Chunk::getHeaderSize()); return AllocatorT::PrimaryT::canAllocate(NeededSize); } @@ -47,12 +52,12 @@ template void checkMemoryTaggingMaybe(AllocatorT *Allocator, void *P, scudo::uptr Size, scudo::uptr Alignment) { const scudo::uptr MinAlignment = 1UL << SCUDO_MIN_ALIGNMENT_LOG; - Size = scudo::roundUpTo(Size, MinAlignment); + Size = scudo::roundUp(Size, MinAlignment); if (Allocator->useMemoryTaggingTestOnly()) EXPECT_DEATH( { disableDebuggerdMaybe(); - reinterpret_cast(P)[-1] = 0xaa; + reinterpret_cast(P)[-1] = 'A'; }, ""); if (isPrimaryAllocation(Size, Alignment) @@ -61,7 +66,7 @@ void checkMemoryTaggingMaybe(AllocatorT *Allocator, void *P, scudo::uptr Size, EXPECT_DEATH( { disableDebuggerdMaybe(); - reinterpret_cast(P)[Size] = 0xaa; + reinterpret_cast(P)[Size] = 'A'; }, ""); } @@ -76,14 +81,70 @@ template struct TestAllocator : scudo::Allocator { } ~TestAllocator() { this->unmapTestOnly(); } - void *operator new(size_t size) { + void *operator new(size_t size); + void operator delete(void *ptr); +}; + +constexpr size_t kMaxAlign = std::max({ + alignof(scudo::Allocator), +#if SCUDO_CAN_USE_PRIMARY64 + alignof(scudo::Allocator), +#endif + alignof(scudo::Allocator) +}); + +#if SCUDO_RISCV64 +// The allocator is over 4MB large. Rather than creating an instance of this on +// the heap, keep it in a global storage to reduce fragmentation from having to +// mmap this at the start of every test. +struct TestAllocatorStorage { + static constexpr size_t kMaxSize = std::max({ + sizeof(scudo::Allocator), +#if SCUDO_CAN_USE_PRIMARY64 + sizeof(scudo::Allocator), +#endif + sizeof(scudo::Allocator) + }); + + // To alleviate some problem, let's skip the thread safety analysis here. + static void *get(size_t size) NO_THREAD_SAFETY_ANALYSIS { + CHECK(size <= kMaxSize && + "Allocation size doesn't fit in the allocator storage"); + M.lock(); + return AllocatorStorage; + } + + static void release(void *ptr) NO_THREAD_SAFETY_ANALYSIS { + M.assertHeld(); + M.unlock(); + ASSERT_EQ(ptr, AllocatorStorage); + } + + static scudo::HybridMutex M; + static uint8_t AllocatorStorage[kMaxSize]; +}; +scudo::HybridMutex TestAllocatorStorage::M; +alignas(kMaxAlign) uint8_t TestAllocatorStorage::AllocatorStorage[kMaxSize]; +#else +struct TestAllocatorStorage { + static void *get(size_t size) NO_THREAD_SAFETY_ANALYSIS { void *p = nullptr; - EXPECT_EQ(0, posix_memalign(&p, alignof(TestAllocator), size)); + EXPECT_EQ(0, posix_memalign(&p, kMaxAlign, size)); return p; } - - void operator delete(void *ptr) { free(ptr); } + static void release(void *ptr) NO_THREAD_SAFETY_ANALYSIS { free(ptr); } }; +#endif + +template +void *TestAllocator::operator new(size_t size) { + return TestAllocatorStorage::get(size); +} + +template +void TestAllocator::operator delete(void *ptr) { + TestAllocatorStorage::release(ptr); +} template struct ScudoCombinedTest : public Test { ScudoCombinedTest() { @@ -91,7 +152,7 @@ template struct ScudoCombinedTest : public Test { Allocator = std::make_unique(); } ~ScudoCombinedTest() { - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); UseQuarantine = true; } @@ -105,20 +166,65 @@ template struct ScudoCombinedTest : public Test { template using ScudoCombinedDeathTest = ScudoCombinedTest; +namespace scudo { +struct TestConditionVariableConfig { + static const bool MaySupportMemoryTagging = true; + template + using TSDRegistryT = + scudo::TSDRegistrySharedT; // Shared, max 8 TSDs. + + struct Primary { + using SizeClassMap = scudo::AndroidSizeClassMap; +#if SCUDO_CAN_USE_PRIMARY64 + static const scudo::uptr RegionSizeLog = 28U; + typedef scudo::u32 CompactPtrT; + static const scudo::uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const scudo::uptr GroupSizeLog = 20U; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; +#else + static const scudo::uptr RegionSizeLog = 18U; + static const scudo::uptr GroupSizeLog = 18U; + typedef scudo::uptr CompactPtrT; +#endif + static const scudo::s32 MinReleaseToOsIntervalMs = 1000; + static const scudo::s32 MaxReleaseToOsIntervalMs = 1000; + static const bool UseConditionVariable = true; +#if SCUDO_LINUX + using ConditionVariableT = scudo::ConditionVariableLinux; +#else + using ConditionVariableT = scudo::ConditionVariableDummy; +#endif + }; +#if SCUDO_CAN_USE_PRIMARY64 + template + using PrimaryT = scudo::SizeClassAllocator64; +#else + template + using PrimaryT = scudo::SizeClassAllocator32; +#endif + + struct Secondary { + template + using CacheT = scudo::MapAllocatorNoCache; + }; + template using SecondaryT = scudo::MapAllocator; +}; +} // namespace scudo + #if SCUDO_FUCHSIA #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ - SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidSvelteConfig) \ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, FuchsiaConfig) #else #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME) \ - SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidSvelteConfig) \ SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, DefaultConfig) \ - SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidConfig) + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidConfig) \ + SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConditionVariableConfig) #endif #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE) \ using FIXTURE##NAME##_##TYPE = FIXTURE##NAME; \ - TEST_F(FIXTURE##NAME##_##TYPE, NAME) { Run(); } + TEST_F(FIXTURE##NAME##_##TYPE, NAME) { FIXTURE##NAME::Run(); } #define SCUDO_TYPED_TEST(FIXTURE, NAME) \ template \ @@ -152,9 +258,10 @@ void ScudoCombinedTest::BasicTest(scudo::uptr SizeLog) { for (scudo::uptr AlignLog = MinAlignLog; AlignLog <= 16U; AlignLog++) { const scudo::uptr Align = 1U << AlignLog; for (scudo::sptr Delta = -32; Delta <= 32; Delta++) { - if (static_cast(1U << SizeLog) + Delta <= 0) + if ((1LL << SizeLog) + Delta < 0) continue; - const scudo::uptr Size = (1U << SizeLog) + Delta; + const scudo::uptr Size = + static_cast((1LL << SizeLog) + Delta); void *P = Allocator->allocate(Size, Origin, Align); EXPECT_NE(P, nullptr); EXPECT_TRUE(Allocator->isOwned(P)); @@ -165,6 +272,9 @@ void ScudoCombinedTest::BasicTest(scudo::uptr SizeLog) { Allocator->deallocate(P, Origin, Size); } } + + Allocator->printStats(); + Allocator->printFragmentationInfo(); } #define SCUDO_MAKE_BASIC_TEST(SizeLog) \ @@ -204,7 +314,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ZeroContents) { void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, true); EXPECT_NE(P, nullptr); for (scudo::uptr I = 0; I < Size; I++) - ASSERT_EQ((reinterpret_cast(P))[I], 0); + ASSERT_EQ((reinterpret_cast(P))[I], '\0'); memset(P, 0xaa, Size); Allocator->deallocate(P, Origin, Size); } @@ -222,7 +332,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ZeroFill) { void *P = Allocator->allocate(Size, Origin, 1U << MinAlignLog, false); EXPECT_NE(P, nullptr); for (scudo::uptr I = 0; I < Size; I++) - ASSERT_EQ((reinterpret_cast(P))[I], 0); + ASSERT_EQ((reinterpret_cast(P))[I], '\0'); memset(P, 0xaa, Size); Allocator->deallocate(P, Origin, Size); } @@ -281,7 +391,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateLargeIncreasing) { // we preserve the data in the process. scudo::uptr Size = 16; void *P = Allocator->allocate(Size, Origin); - const char Marker = 0xab; + const char Marker = 'A'; memset(P, Marker, Size); while (Size < TypeParam::Primary::SizeClassMap::MaxSize * 4) { void *NewP = Allocator->reallocate(P, Size * 2); @@ -303,7 +413,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateLargeDecreasing) { scudo::uptr Size = TypeParam::Primary::SizeClassMap::MaxSize * 2; const scudo::uptr DataSize = 2048U; void *P = Allocator->allocate(Size, Origin); - const char Marker = 0xab; + const char Marker = 'A'; memset(P, Marker, scudo::Min(Size, DataSize)); while (Size > 1U) { Size /= 2U; @@ -326,10 +436,11 @@ SCUDO_TYPED_TEST(ScudoCombinedDeathTest, ReallocateSame) { constexpr scudo::uptr ReallocSize = TypeParam::Primary::SizeClassMap::MaxSize - 64; void *P = Allocator->allocate(ReallocSize, Origin); - const char Marker = 0xab; + const char Marker = 'A'; memset(P, Marker, ReallocSize); for (scudo::sptr Delta = -32; Delta < 32; Delta += 8) { - const scudo::uptr NewSize = ReallocSize + Delta; + const scudo::uptr NewSize = + static_cast(static_cast(ReallocSize) + Delta); void *NewP = Allocator->reallocate(P, NewSize); EXPECT_EQ(NewP, P); for (scudo::uptr I = 0; I < ReallocSize - 32; I++) @@ -351,11 +462,13 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, IterateOverChunks) { std::vector V; for (scudo::uptr I = 0; I < 64U; I++) V.push_back(Allocator->allocate( - rand() % (TypeParam::Primary::SizeClassMap::MaxSize / 2U), Origin)); + static_cast(std::rand()) % + (TypeParam::Primary::SizeClassMap::MaxSize / 2U), + Origin)); Allocator->disable(); Allocator->iterateOverChunks( 0U, static_cast(SCUDO_MMAP_RANGE_SIZE - 1), - [](uintptr_t Base, size_t Size, void *Arg) { + [](uintptr_t Base, UNUSED size_t Size, void *Arg) { std::vector *V = reinterpret_cast *>(Arg); void *P = reinterpret_cast(Base); EXPECT_NE(std::find(V->begin(), V->end(), P), V->end()); @@ -380,7 +493,7 @@ SCUDO_TYPED_TEST(ScudoCombinedDeathTest, UseAfterFree) { disableDebuggerdMaybe(); void *P = Allocator->allocate(Size, Origin); Allocator->deallocate(P, Origin); - reinterpret_cast(P)[0] = 0xaa; + reinterpret_cast(P)[0] = 'A'; }, ""); EXPECT_DEATH( @@ -388,7 +501,7 @@ SCUDO_TYPED_TEST(ScudoCombinedDeathTest, UseAfterFree) { disableDebuggerdMaybe(); void *P = Allocator->allocate(Size, Origin); Allocator->deallocate(P, Origin); - reinterpret_cast(P)[Size - 1] = 0xaa; + reinterpret_cast(P)[Size - 1] = 'A'; }, ""); } @@ -400,18 +513,18 @@ SCUDO_TYPED_TEST(ScudoCombinedDeathTest, DisableMemoryTagging) { if (Allocator->useMemoryTaggingTestOnly()) { // Check that disabling memory tagging works correctly. void *P = Allocator->allocate(2048, Origin); - EXPECT_DEATH(reinterpret_cast(P)[2048] = 0xaa, ""); + EXPECT_DEATH(reinterpret_cast(P)[2048] = 'A', ""); scudo::ScopedDisableMemoryTagChecks NoTagChecks; Allocator->disableMemoryTagging(); - reinterpret_cast(P)[2048] = 0xaa; + reinterpret_cast(P)[2048] = 'A'; Allocator->deallocate(P, Origin); P = Allocator->allocate(2048, Origin); EXPECT_EQ(scudo::untagPointer(P), P); - reinterpret_cast(P)[2048] = 0xaa; + reinterpret_cast(P)[2048] = 'A'; Allocator->deallocate(P, Origin); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } } @@ -434,21 +547,49 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, Stats) { EXPECT_NE(Stats.find("Stats: Quarantine"), std::string::npos); } -SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) { +SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) NO_THREAD_SAFETY_ANALYSIS { + auto *Allocator = this->Allocator.get(); + + std::vector V; + for (scudo::uptr I = 0; I < 64U; I++) + V.push_back(Allocator->allocate( + static_cast(std::rand()) % + (TypeParam::Primary::SizeClassMap::MaxSize / 2U), + Origin)); + for (auto P : V) + Allocator->deallocate(P, Origin); + + bool UnlockRequired; + auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); + TSD->assertLocked(/*BypassCheck=*/!UnlockRequired); + EXPECT_TRUE(!TSD->getCache().isEmpty()); + TSD->getCache().drain(); + EXPECT_TRUE(TSD->getCache().isEmpty()); + if (UnlockRequired) + TSD->unlock(); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, ForceCacheDrain) NO_THREAD_SAFETY_ANALYSIS { auto *Allocator = this->Allocator.get(); std::vector V; for (scudo::uptr I = 0; I < 64U; I++) V.push_back(Allocator->allocate( - rand() % (TypeParam::Primary::SizeClassMap::MaxSize / 2U), Origin)); + static_cast(std::rand()) % + (TypeParam::Primary::SizeClassMap::MaxSize / 2U), + Origin)); for (auto P : V) Allocator->deallocate(P, Origin); + // `ForceAll` will also drain the caches. + Allocator->releaseToOS(scudo::ReleaseToOS::ForceAll); + bool UnlockRequired; auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); - EXPECT_TRUE(!TSD->Cache.isEmpty()); - TSD->Cache.drain(); - EXPECT_TRUE(TSD->Cache.isEmpty()); + TSD->assertLocked(/*BypassCheck=*/!UnlockRequired); + EXPECT_TRUE(TSD->getCache().isEmpty()); + EXPECT_EQ(TSD->getQuarantineCache().getSize(), 0U); + EXPECT_TRUE(Allocator->getQuarantine()->isEmpty()); if (UnlockRequired) TSD->unlock(); } @@ -468,12 +609,16 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) { } std::vector> V; for (scudo::uptr I = 0; I < 256U; I++) { - const scudo::uptr Size = std::rand() % 4096U; + const scudo::uptr Size = static_cast(std::rand()) % 4096U; void *P = Allocator->allocate(Size, Origin); // A region could have ran out of memory, resulting in a null P. if (P) V.push_back(std::make_pair(P, Size)); } + + // Try to interleave pushBlocks(), popBatch() and releaseToOS(). + Allocator->releaseToOS(scudo::ReleaseToOS::Force); + while (!V.empty()) { auto Pair = V.back(); Allocator->deallocate(Pair.first, Origin, Pair.second); @@ -487,18 +632,19 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) { } for (auto &T : Threads) T.join(); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } // Test that multiple instantiations of the allocator have not messed up the // process's signal handlers (GWP-ASan used to do this). TEST(ScudoCombinedDeathTest, SKIP_ON_FUCHSIA(testSEGV)) { const scudo::uptr Size = 4 * scudo::getPageSizeCached(); - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, "testSEGV", MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); + scudo::ReservedMemoryT ReservedMemory; + ASSERT_TRUE(ReservedMemory.create(/*Addr=*/0U, Size, "testSEGV")); + void *P = reinterpret_cast(ReservedMemory.getBase()); + ASSERT_NE(P, nullptr); EXPECT_DEATH(memset(P, 0xaa, Size), ""); - scudo::unmap(P, Size, UNMAP_ALL, &Data); + ReservedMemory.release(); } struct DeathSizeClassConfig { @@ -506,28 +652,37 @@ struct DeathSizeClassConfig { static const scudo::uptr MinSizeLog = 10; static const scudo::uptr MidSizeLog = 10; static const scudo::uptr MaxSizeLog = 13; - static const scudo::u32 MaxNumCachedHint = 4; + static const scudo::u16 MaxNumCachedHint = 8; static const scudo::uptr MaxBytesCachedLog = 12; static const scudo::uptr SizeDelta = 0; }; -static const scudo::uptr DeathRegionSizeLog = 20U; +static const scudo::uptr DeathRegionSizeLog = 21U; struct DeathConfig { static const bool MaySupportMemoryTagging = false; - - // Tiny allocator, its Primary only serves chunks of four sizes. - using SizeClassMap = scudo::FixedSizeClassMap; - typedef scudo::SizeClassAllocator64 Primary; - static const scudo::uptr PrimaryRegionSizeLog = DeathRegionSizeLog; - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - typedef scudo::uptr PrimaryCompactPtrT; - static const scudo::uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; - - typedef scudo::MapAllocatorNoCache SecondaryCache; template using TSDRegistryT = scudo::TSDRegistrySharedT; + + struct Primary { + // Tiny allocator, its Primary only serves chunks of four sizes. + using SizeClassMap = scudo::FixedSizeClassMap; + static const scudo::uptr RegionSizeLog = DeathRegionSizeLog; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + static const scudo::uptr GroupSizeLog = 18; + }; + template + using PrimaryT = scudo::SizeClassAllocator64; + + struct Secondary { + template + using CacheT = scudo::MapAllocatorNoCache; + }; + + template using SecondaryT = scudo::MapAllocator; }; TEST(ScudoCombinedDeathTest, DeathCombined) { @@ -572,13 +727,14 @@ TEST(ScudoCombinedTest, FullRegion) { std::vector V; scudo::uptr FailedAllocationsCount = 0; for (scudo::uptr ClassId = 1U; - ClassId <= DeathConfig::SizeClassMap::LargestClassId; ClassId++) { + ClassId <= DeathConfig::Primary::SizeClassMap::LargestClassId; + ClassId++) { const scudo::uptr Size = - DeathConfig::SizeClassMap::getSizeByClassId(ClassId); + DeathConfig::Primary::SizeClassMap::getSizeByClassId(ClassId); // Allocate enough to fill all of the regions above this one. const scudo::uptr MaxNumberOfChunks = ((1U << DeathRegionSizeLog) / Size) * - (DeathConfig::SizeClassMap::LargestClassId - ClassId + 1); + (DeathConfig::Primary::SizeClassMap::LargestClassId - ClassId + 1); void *P; for (scudo::uptr I = 0; I <= MaxNumberOfChunks; I++) { P = Allocator->allocate(Size - 64U, Origin); @@ -599,11 +755,12 @@ TEST(ScudoCombinedTest, FullRegion) { // operation without issue. SCUDO_TYPED_TEST(ScudoCombinedTest, ReleaseToOS) { auto *Allocator = this->Allocator.get(); - Allocator->releaseToOS(); + Allocator->releaseToOS(scudo::ReleaseToOS::Force); } SCUDO_TYPED_TEST(ScudoCombinedTest, OddEven) { auto *Allocator = this->Allocator.get(); + Allocator->setOption(scudo::Option::MemtagTuning, M_MEMTAG_TUNING_BUFFER_OVERFLOW); if (!Allocator->useMemoryTaggingTestOnly()) return; @@ -643,7 +800,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, OddEven) { SCUDO_TYPED_TEST(ScudoCombinedTest, DisableMemInit) { auto *Allocator = this->Allocator.get(); - std::vector Ptrs(65536, nullptr); + std::vector Ptrs(65536); Allocator->setOption(scudo::Option::ThreadDisableMemInit, 1); @@ -673,9 +830,74 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, DisableMemInit) { for (unsigned I = 0; I != Ptrs.size(); ++I) { Ptrs[I] = Allocator->allocate(Size, Origin, 1U << MinAlignLog, true); for (scudo::uptr J = 0; J < Size; ++J) - ASSERT_EQ((reinterpret_cast(Ptrs[I]))[J], 0); + ASSERT_EQ((reinterpret_cast(Ptrs[I]))[J], '\0'); } } Allocator->setOption(scudo::Option::ThreadDisableMemInit, 0); } + +SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateInPlaceStress) { + auto *Allocator = this->Allocator.get(); + + // Regression test: make realloc-in-place happen at the very right end of a + // mapped region. + constexpr size_t nPtrs = 10000; + for (scudo::uptr i = 1; i < 32; ++i) { + scudo::uptr Size = 16 * i - 1; + std::vector Ptrs; + for (size_t i = 0; i < nPtrs; ++i) { + void *P = Allocator->allocate(Size, Origin); + P = Allocator->reallocate(P, Size + 1); + Ptrs.push_back(P); + } + + for (size_t i = 0; i < nPtrs; ++i) + Allocator->deallocate(Ptrs[i], Origin); + } +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, RingBufferSize) { + auto *Allocator = this->Allocator.get(); + auto Size = Allocator->getRingBufferSize(); + if (Size > 0) + EXPECT_EQ(Allocator->getRingBufferAddress()[Size - 1], '\0'); +} + +SCUDO_TYPED_TEST(ScudoCombinedTest, RingBufferAddress) { + auto *Allocator = this->Allocator.get(); + auto *Addr = Allocator->getRingBufferAddress(); + EXPECT_NE(Addr, nullptr); + EXPECT_EQ(Addr, Allocator->getRingBufferAddress()); +} + +#if SCUDO_CAN_USE_PRIMARY64 +#if SCUDO_TRUSTY + +// TrustyConfig is designed for a domain-specific allocator. Add a basic test +// which covers only simple operations and ensure the configuration is able to +// compile. +TEST(ScudoCombinedTest, BasicTrustyConfig) { + using AllocatorT = scudo::Allocator; + auto Allocator = std::unique_ptr(new AllocatorT()); + + for (scudo::uptr ClassId = 1U; + ClassId <= scudo::TrustyConfig::SizeClassMap::LargestClassId; + ClassId++) { + const scudo::uptr Size = + scudo::TrustyConfig::SizeClassMap::getSizeByClassId(ClassId); + void *p = Allocator->allocate(Size - scudo::Chunk::getHeaderSize(), Origin); + ASSERT_NE(p, nullptr); + free(p); + } + + bool UnlockRequired; + auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired); + TSD->assertLocked(/*BypassCheck=*/!UnlockRequired); + TSD->getCache().drain(); + + Allocator->releaseToOS(scudo::ReleaseToOS::Force); +} + +#endif +#endif diff --git a/third_party/llvm-scudo-standalone/tests/common_test.cpp b/third_party/llvm-scudo-standalone/tests/common_test.cpp index 711e3b2..fff7c66 100644 --- a/third_party/llvm-scudo-standalone/tests/common_test.cpp +++ b/third_party/llvm-scudo-standalone/tests/common_test.cpp @@ -10,6 +10,7 @@ #include "tests/scudo_unit_test.h" #include "common.h" +#include "mem_map.h" #include #include @@ -34,39 +35,41 @@ TEST(ScudoCommonTest, SKIP_ON_FUCHSIA(ResidentMemorySize)) { const uptr Size = 1ull << 30; const uptr Threshold = Size >> 3; - MapPlatformData Data = {}; - void *P = map(nullptr, Size, "ResidentMemorySize", 0, &Data); - ASSERT_NE(nullptr, P); + MemMapT MemMap; + ASSERT_TRUE(MemMap.map(/*Addr=*/0U, Size, "ResidentMemorySize")); + ASSERT_NE(MemMap.getBase(), 0U); + void *P = reinterpret_cast(MemMap.getBase()); EXPECT_LT(getResidentMemorySize(), OnStart + Threshold); memset(P, 1, Size); EXPECT_GT(getResidentMemorySize(), OnStart + Size - Threshold); - releasePagesToOS((uptr)P, 0, Size, &Data); + MemMap.releasePagesToOS(MemMap.getBase(), Size); EXPECT_LT(getResidentMemorySize(), OnStart + Threshold); memset(P, 1, Size); EXPECT_GT(getResidentMemorySize(), OnStart + Size - Threshold); - unmap(P, Size, 0, &Data); + MemMap.unmap(MemMap.getBase(), Size); } TEST(ScudoCommonTest, Zeros) { const uptr Size = 1ull << 20; - MapPlatformData Data = {}; - uptr *P = reinterpret_cast(map(nullptr, Size, "Zeros", 0, &Data)); - const ptrdiff_t N = Size / sizeof(*P); - ASSERT_NE(nullptr, P); + MemMapT MemMap; + ASSERT_TRUE(MemMap.map(/*Addr=*/0U, Size, "Zeros")); + ASSERT_NE(MemMap.getBase(), 0U); + uptr *P = reinterpret_cast(MemMap.getBase()); + const ptrdiff_t N = Size / sizeof(uptr); EXPECT_EQ(std::count(P, P + N, 0), N); memset(P, 1, Size); EXPECT_EQ(std::count(P, P + N, 0), 0); - releasePagesToOS((uptr)P, 0, Size, &Data); + MemMap.releasePagesToOS(MemMap.getBase(), Size); EXPECT_EQ(std::count(P, P + N, 0), N); - unmap(P, Size, 0, &Data); + MemMap.unmap(MemMap.getBase(), Size); } } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/tests/condition_variable_test.cpp b/third_party/llvm-scudo-standalone/tests/condition_variable_test.cpp new file mode 100644 index 0000000..caba1f6 --- /dev/null +++ b/third_party/llvm-scudo-standalone/tests/condition_variable_test.cpp @@ -0,0 +1,59 @@ +//===-- condition_variable_test.cpp -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "tests/scudo_unit_test.h" + +#include "common.h" +#include "condition_variable.h" +#include "mutex.h" + +#include + +template void simpleWaitAndNotifyAll() { + constexpr scudo::u32 NumThreads = 2; + constexpr scudo::u32 CounterMax = 1024; + std::thread Threads[NumThreads]; + + scudo::HybridMutex M; + ConditionVariableT CV; + CV.bindTestOnly(M); + scudo::u32 Counter = 0; + + for (scudo::u32 I = 0; I < NumThreads; ++I) { + Threads[I] = std::thread( + [&](scudo::u32 Id) { + do { + scudo::ScopedLock L(M); + if (Counter % NumThreads != Id && Counter < CounterMax) + CV.wait(M); + if (Counter >= CounterMax) { + break; + } else { + ++Counter; + CV.notifyAll(M); + } + } while (true); + }, + I); + } + + for (std::thread &T : Threads) + T.join(); + + EXPECT_EQ(Counter, CounterMax); +} + +TEST(ScudoConditionVariableTest, DummyCVWaitAndNotifyAll) { + simpleWaitAndNotifyAll(); +} + +#ifdef SCUDO_LINUX +TEST(ScudoConditionVariableTest, LinuxCVWaitAndNotifyAll) { + simpleWaitAndNotifyAll(); +} +#endif diff --git a/third_party/llvm-scudo-standalone/tests/list_test.cpp b/third_party/llvm-scudo-standalone/tests/list_test.cpp index 8e13991..140ca02 100644 --- a/third_party/llvm-scudo-standalone/tests/list_test.cpp +++ b/third_party/llvm-scudo-standalone/tests/list_test.cpp @@ -161,6 +161,10 @@ TEST(ScudoListTest, SinglyLinkedList) { setList(&L1, X); checkList(&L1, X); + setList(&L1, X, Y); + L1.insert(X, Z); + checkList(&L1, X, Z, Y); + setList(&L1, X, Y, Z); setList(&L2, A, B, C); L1.append_back(&L2); diff --git a/third_party/llvm-scudo-standalone/tests/map_test.cpp b/third_party/llvm-scudo-standalone/tests/map_test.cpp index 095e1b6..06a56f8 100644 --- a/third_party/llvm-scudo-standalone/tests/map_test.cpp +++ b/third_party/llvm-scudo-standalone/tests/map_test.cpp @@ -9,6 +9,7 @@ #include "tests/scudo_unit_test.h" #include "common.h" +#include "mem_map.h" #include #include @@ -17,16 +18,20 @@ static const char *MappingName = "scudo:test"; TEST(ScudoMapTest, PageSize) { EXPECT_EQ(scudo::getPageSizeCached(), - static_cast(getpagesize())); + static_cast(sysconf(_SC_PAGESIZE))); } TEST(ScudoMapDeathTest, MapNoAccessUnmap) { const scudo::uptr Size = 4 * scudo::getPageSizeCached(); - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, MappingName, MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); - EXPECT_DEATH(memset(P, 0xaa, Size), ""); - scudo::unmap(P, Size, UNMAP_ALL, &Data); + scudo::ReservedMemoryT ReservedMemory; + + ASSERT_TRUE(ReservedMemory.create(/*Addr=*/0U, Size, MappingName)); + EXPECT_NE(ReservedMemory.getBase(), 0U); + EXPECT_DEATH( + memset(reinterpret_cast(ReservedMemory.getBase()), 0xaa, Size), + ""); + + ReservedMemory.release(); } TEST(ScudoMapDeathTest, MapUnmap) { @@ -36,11 +41,13 @@ TEST(ScudoMapDeathTest, MapUnmap) { // Repeat few time to avoid missing crash if it's mmaped by unrelated // code. for (int i = 0; i < 10; ++i) { - void *P = scudo::map(nullptr, Size, MappingName, 0, nullptr); - if (!P) + scudo::MemMapT MemMap; + MemMap.map(/*Addr=*/0U, Size, MappingName); + scudo::uptr P = MemMap.getBase(); + if (P == 0U) continue; - scudo::unmap(P, Size, 0, nullptr); - memset(P, 0xbb, Size); + MemMap.unmap(MemMap.getBase(), Size); + memset(reinterpret_cast(P), 0xbb, Size); } }, ""); @@ -49,30 +56,36 @@ TEST(ScudoMapDeathTest, MapUnmap) { TEST(ScudoMapDeathTest, MapWithGuardUnmap) { const scudo::uptr PageSize = scudo::getPageSizeCached(); const scudo::uptr Size = 4 * PageSize; - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size + 2 * PageSize, MappingName, MAP_NOACCESS, - &Data); - EXPECT_NE(P, nullptr); - void *Q = - reinterpret_cast(reinterpret_cast(P) + PageSize); - EXPECT_EQ(scudo::map(Q, Size, MappingName, 0, &Data), Q); - memset(Q, 0xaa, Size); - EXPECT_DEATH(memset(Q, 0xaa, Size + 1), ""); - scudo::unmap(P, Size + 2 * PageSize, UNMAP_ALL, &Data); + scudo::ReservedMemoryT ReservedMemory; + ASSERT_TRUE( + ReservedMemory.create(/*Addr=*/0U, Size + 2 * PageSize, MappingName)); + ASSERT_NE(ReservedMemory.getBase(), 0U); + + scudo::MemMapT MemMap = + ReservedMemory.dispatch(ReservedMemory.getBase(), Size + 2 * PageSize); + ASSERT_TRUE(MemMap.isAllocated()); + scudo::uptr Q = MemMap.getBase() + PageSize; + ASSERT_TRUE(MemMap.remap(Q, Size, MappingName)); + memset(reinterpret_cast(Q), 0xaa, Size); + EXPECT_DEATH(memset(reinterpret_cast(Q), 0xaa, Size + 1), ""); + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } TEST(ScudoMapTest, MapGrowUnmap) { const scudo::uptr PageSize = scudo::getPageSizeCached(); const scudo::uptr Size = 4 * PageSize; - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, MappingName, MAP_NOACCESS, &Data); - EXPECT_NE(P, nullptr); - void *Q = - reinterpret_cast(reinterpret_cast(P) + PageSize); - EXPECT_EQ(scudo::map(Q, PageSize, MappingName, 0, &Data), Q); - memset(Q, 0xaa, PageSize); - Q = reinterpret_cast(reinterpret_cast(Q) + PageSize); - EXPECT_EQ(scudo::map(Q, PageSize, MappingName, 0, &Data), Q); - memset(Q, 0xbb, PageSize); - scudo::unmap(P, Size, UNMAP_ALL, &Data); + scudo::ReservedMemoryT ReservedMemory; + ReservedMemory.create(/*Addr=*/0U, Size, MappingName); + ASSERT_TRUE(ReservedMemory.isCreated()); + + scudo::MemMapT MemMap = + ReservedMemory.dispatch(ReservedMemory.getBase(), Size); + ASSERT_TRUE(MemMap.isAllocated()); + scudo::uptr Q = MemMap.getBase() + PageSize; + ASSERT_TRUE(MemMap.remap(Q, PageSize, MappingName)); + memset(reinterpret_cast(Q), 0xaa, PageSize); + Q += PageSize; + ASSERT_TRUE(MemMap.remap(Q, PageSize, MappingName)); + memset(reinterpret_cast(Q), 0xbb, PageSize); + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); } diff --git a/third_party/llvm-scudo-standalone/tests/memtag_test.cpp b/third_party/llvm-scudo-standalone/tests/memtag_test.cpp index 72c9de3..fd277f9 100644 --- a/third_party/llvm-scudo-standalone/tests/memtag_test.cpp +++ b/third_party/llvm-scudo-standalone/tests/memtag_test.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "common.h" +#include "mem_map.h" #include "memtag.h" #include "platform.h" #include "tests/scudo_unit_test.h" @@ -38,27 +39,31 @@ TEST(MemtagBasicDeathTest, Unsupported) { EXPECT_DEATH(addFixedTag(nullptr, 0), "not supported"); } -class MemtagTest : public ::testing::Test { +class MemtagTest : public Test { protected: void SetUp() override { if (!archSupportsMemoryTagging() || !systemDetectsMemoryTagFaultsTestOnly()) GTEST_SKIP() << "Memory tagging is not supported"; BufferSize = getPageSizeCached(); - Buffer = reinterpret_cast( - map(nullptr, BufferSize, "MemtagTest", MAP_MEMTAG, &Data)); - Addr = reinterpret_cast(Buffer); + ASSERT_FALSE(MemMap.isAllocated()); + ASSERT_TRUE(MemMap.map(/*Addr=*/0U, BufferSize, "MemtagTest", MAP_MEMTAG)); + ASSERT_NE(MemMap.getBase(), 0U); + Addr = MemMap.getBase(); + Buffer = reinterpret_cast(Addr); EXPECT_TRUE(isAligned(Addr, archMemoryTagGranuleSize())); EXPECT_EQ(Addr, untagPointer(Addr)); } void TearDown() override { - if (Buffer) - unmap(Buffer, BufferSize, 0, &Data); + if (Buffer) { + ASSERT_TRUE(MemMap.isAllocated()); + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + } } uptr BufferSize = 0; - MapPlatformData Data = {}; + scudo::MemMapT MemMap = {}; u8 *Buffer = nullptr; uptr Addr = 0; }; @@ -71,20 +76,24 @@ TEST_F(MemtagTest, ArchMemoryTagGranuleSize) { } TEST_F(MemtagTest, ExtractTag) { +// The test is already skipped on anything other than 64 bit. But +// compiling on 32 bit leads to warnings/errors, so skip compiling the test. +#if defined(__LP64__) uptr Tags = 0; // Try all value for the top byte and check the tags values are in the // expected range. for (u64 Top = 0; Top < 0x100; ++Top) Tags = Tags | (1u << extractTag(Addr | (Top << 56))); EXPECT_EQ(0xffffull, Tags); +#endif } TEST_F(MemtagDeathTest, AddFixedTag) { for (uptr Tag = 0; Tag < 0x10; ++Tag) EXPECT_EQ(Tag, extractTag(addFixedTag(Addr, Tag))); if (SCUDO_DEBUG) { - EXPECT_DEBUG_DEATH(addFixedTag(Addr, 16), ""); - EXPECT_DEBUG_DEATH(addFixedTag(~Addr, 0), ""); + EXPECT_DEATH(addFixedTag(Addr, 16), ""); + EXPECT_DEATH(addFixedTag(~Addr, 0), ""); } } @@ -111,23 +120,35 @@ TEST_F(MemtagTest, SelectRandomTag) { uptr Tags = 0; for (uptr I = 0; I < 100000; ++I) Tags = Tags | (1u << extractTag(selectRandomTag(Ptr, 0))); - EXPECT_EQ(0xfffeull, Tags); + // std::popcnt is C++20 + int PopCnt = 0; + while (Tags) { + PopCnt += Tags & 1; + Tags >>= 1; + } + // Random tags are not always very random, and this test is not about PRNG + // quality. Anything above half would be satisfactory. + EXPECT_GE(PopCnt, 8); } } TEST_F(MemtagTest, SelectRandomTagWithMask) { +// The test is already skipped on anything other than 64 bit. But +// compiling on 32 bit leads to warnings/errors, so skip compiling the test. +#if defined(__LP64__) for (uptr j = 0; j < 32; ++j) { for (uptr i = 0; i < 1000; ++i) EXPECT_NE(j, extractTag(selectRandomTag(Addr, 1ull << j))); } +#endif } TEST_F(MemtagDeathTest, SKIP_NO_DEBUG(LoadStoreTagUnaligned)) { for (uptr P = Addr; P < Addr + 4 * archMemoryTagGranuleSize(); ++P) { if (P % archMemoryTagGranuleSize() == 0) continue; - EXPECT_DEBUG_DEATH(loadTag(P), ""); - EXPECT_DEBUG_DEATH(storeTag(P), ""); + EXPECT_DEATH(loadTag(P), ""); + EXPECT_DEATH(storeTag(P), ""); } } @@ -148,11 +169,14 @@ TEST_F(MemtagDeathTest, SKIP_NO_DEBUG(StoreTagsUnaligned)) { uptr Tagged = addFixedTag(P, 5); if (Tagged % archMemoryTagGranuleSize() == 0) continue; - EXPECT_DEBUG_DEATH(storeTags(Tagged, Tagged), ""); + EXPECT_DEATH(storeTags(Tagged, Tagged), ""); } } TEST_F(MemtagTest, StoreTags) { +// The test is already skipped on anything other than 64 bit. But +// compiling on 32 bit leads to warnings/errors, so skip compiling the test. +#if defined(__LP64__) const uptr MaxTaggedSize = 4 * archMemoryTagGranuleSize(); for (uptr Size = 0; Size <= MaxTaggedSize; ++Size) { uptr NoTagBegin = Addr + archMemoryTagGranuleSize(); @@ -163,7 +187,7 @@ TEST_F(MemtagTest, StoreTags) { uptr TaggedBegin = addFixedTag(NoTagBegin, Tag); uptr TaggedEnd = addFixedTag(NoTagEnd, Tag); - EXPECT_EQ(roundUpTo(TaggedEnd, archMemoryTagGranuleSize()), + EXPECT_EQ(roundUp(TaggedEnd, archMemoryTagGranuleSize()), storeTags(TaggedBegin, TaggedEnd)); uptr LoadPtr = Addr; @@ -179,8 +203,9 @@ TEST_F(MemtagTest, StoreTags) { EXPECT_EQ(LoadPtr, loadTag(LoadPtr)); // Reset tags without using StoreTags. - releasePagesToOS(Addr, 0, BufferSize, &Data); + MemMap.releasePagesToOS(Addr, BufferSize); } +#endif } } // namespace scudo diff --git a/third_party/llvm-scudo-standalone/tests/mutex_test.cpp b/third_party/llvm-scudo-standalone/tests/mutex_test.cpp index efee6fe..c3efeab 100644 --- a/third_party/llvm-scudo-standalone/tests/mutex_test.cpp +++ b/third_party/llvm-scudo-standalone/tests/mutex_test.cpp @@ -43,7 +43,7 @@ class TestData { void backoff() { volatile T LocalData[Size] = {}; for (scudo::u32 I = 0; I < Size; I++) { - LocalData[I]++; + LocalData[I] = LocalData[I] + 1; EXPECT_EQ(LocalData[I], 1U); } } @@ -99,3 +99,10 @@ TEST(ScudoMutexTest, MutexTry) { for (scudo::u32 I = 0; I < NumberOfThreads; I++) pthread_join(Threads[I], 0); } + +TEST(ScudoMutexTest, MutexAssertHeld) { + scudo::HybridMutex M; + M.lock(); + M.assertHeld(); + M.unlock(); +} diff --git a/third_party/llvm-scudo-standalone/tests/primary_test.cpp b/third_party/llvm-scudo-standalone/tests/primary_test.cpp index 5ec4361..1817151 100644 --- a/third_party/llvm-scudo-standalone/tests/primary_test.cpp +++ b/third_party/llvm-scudo-standalone/tests/primary_test.cpp @@ -8,12 +8,17 @@ #include "tests/scudo_unit_test.h" +#include "allocator_config.h" +#include "condition_variable.h" #include "primary32.h" #include "primary64.h" #include "size_class_map.h" +#include +#include #include #include +#include #include #include #include @@ -22,64 +27,129 @@ // 32-bit architectures. It's not something we want to encourage, but we still // should ensure the tests pass. -struct TestConfig1 { - static const scudo::uptr PrimaryRegionSizeLog = 18U; - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; +template struct TestConfig1 { static const bool MaySupportMemoryTagging = false; - typedef scudo::uptr PrimaryCompactPtrT; - static const scudo::uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; + + struct Primary { + using SizeClassMap = SizeClassMapT; + static const scudo::uptr RegionSizeLog = 18U; + static const scudo::uptr GroupSizeLog = 18U; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + }; }; -struct TestConfig2 { +template struct TestConfig2 { + static const bool MaySupportMemoryTagging = false; + + struct Primary { + using SizeClassMap = SizeClassMapT; #if defined(__mips__) - // Unable to allocate greater size on QEMU-user. - static const scudo::uptr PrimaryRegionSizeLog = 23U; + // Unable to allocate greater size on QEMU-user. + static const scudo::uptr RegionSizeLog = 23U; #else - static const scudo::uptr PrimaryRegionSizeLog = 24U; + static const scudo::uptr RegionSizeLog = 24U; #endif - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; - static const bool MaySupportMemoryTagging = false; - typedef scudo::uptr PrimaryCompactPtrT; - static const scudo::uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; + static const scudo::uptr GroupSizeLog = 20U; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + }; }; -struct TestConfig3 { +template struct TestConfig3 { + static const bool MaySupportMemoryTagging = true; + + struct Primary { + using SizeClassMap = SizeClassMapT; #if defined(__mips__) - // Unable to allocate greater size on QEMU-user. - static const scudo::uptr PrimaryRegionSizeLog = 23U; + // Unable to allocate greater size on QEMU-user. + static const scudo::uptr RegionSizeLog = 23U; #else - static const scudo::uptr PrimaryRegionSizeLog = 24U; + static const scudo::uptr RegionSizeLog = 24U; #endif - static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN; - static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX; + static const scudo::uptr GroupSizeLog = 20U; + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + typedef scudo::uptr CompactPtrT; + static const scudo::uptr CompactPtrScale = 0; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + }; +}; + +template struct TestConfig4 { static const bool MaySupportMemoryTagging = true; - typedef scudo::uptr PrimaryCompactPtrT; - static const scudo::uptr PrimaryCompactPtrScale = 0; - static const bool PrimaryEnableRandomOffset = true; - static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18; + + struct Primary { + using SizeClassMap = SizeClassMapT; +#if defined(__mips__) + // Unable to allocate greater size on QEMU-user. + static const scudo::uptr RegionSizeLog = 23U; +#else + static const scudo::uptr RegionSizeLog = 24U; +#endif + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + static const scudo::uptr CompactPtrScale = 3U; + static const scudo::uptr GroupSizeLog = 20U; + typedef scudo::u32 CompactPtrT; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + }; }; -template -struct Config : public BaseConfig { - using SizeClassMap = SizeClassMapT; +// This is the only test config that enables the condition variable. +template struct TestConfig5 { + static const bool MaySupportMemoryTagging = true; + + struct Primary { + using SizeClassMap = SizeClassMapT; +#if defined(__mips__) + // Unable to allocate greater size on QEMU-user. + static const scudo::uptr RegionSizeLog = 23U; +#else + static const scudo::uptr RegionSizeLog = 24U; +#endif + static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN; + static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; + static const scudo::uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG; + static const scudo::uptr GroupSizeLog = 18U; + typedef scudo::u32 CompactPtrT; + static const bool EnableRandomOffset = true; + static const scudo::uptr MapSizeIncrement = 1UL << 18; + static const bool UseConditionVariable = true; +#if SCUDO_LINUX + using ConditionVariableT = scudo::ConditionVariableLinux; +#else + using ConditionVariableT = scudo::ConditionVariableDummy; +#endif + }; }; -template +template