|
| 1 | +/* |
| 2 | + * Copyright 2026 Intel Corporation |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +#pragma once |
| 18 | + |
| 19 | +#include "svs/core/allocator.h" |
| 20 | +#include "svs/lib/exception.h" |
| 21 | +#include "svs/lib/memory.h" |
| 22 | + |
| 23 | +#include "tsl/robin_map.h" |
| 24 | +#include "fmt/core.h" |
| 25 | + |
| 26 | +#include <filesystem> |
| 27 | +#include <memory> |
| 28 | +#include <mutex> |
| 29 | +#include <string> |
| 30 | +#include <thread> |
| 31 | +#include <sys/mman.h> |
| 32 | + |
| 33 | +namespace svs { |
| 34 | + |
| 35 | +namespace detail { |
| 36 | + |
| 37 | +/// |
| 38 | +/// @brief Manager for file-backed memory mapped allocations |
| 39 | +/// |
| 40 | +/// Tracks memory-mapped allocations by keeping MMapPtr objects alive. |
| 41 | +/// Thread-safe for concurrent allocations. |
| 42 | +/// |
| 43 | +class MMapAllocationManager { |
| 44 | + public: |
| 45 | + MMapAllocationManager() = default; |
| 46 | + |
| 47 | + /// |
| 48 | + /// @brief Allocate memory mapped to a file |
| 49 | + /// |
| 50 | + /// @param bytes Number of bytes to allocate |
| 51 | + /// @param file_path Path to the file for backing storage |
| 52 | + /// @return Pointer to the allocated memory |
| 53 | + /// |
| 54 | + [[nodiscard]] void* allocate(size_t bytes, const std::filesystem::path& file_path) { |
| 55 | + MemoryMapper mapper{MemoryMapper::ReadWrite, MemoryMapper::MayCreate}; |
| 56 | + auto mmap_ptr = mapper.mmap(file_path, lib::Bytes(bytes)); |
| 57 | + |
| 58 | + void* ptr = mmap_ptr.data(); |
| 59 | + |
| 60 | + // Store the MMapPtr to keep the mapping alive |
| 61 | + { |
| 62 | + std::lock_guard lock{mutex_}; |
| 63 | + allocations_.insert({ptr, std::move(mmap_ptr)}); |
| 64 | + } |
| 65 | + |
| 66 | + return ptr; |
| 67 | + } |
| 68 | + |
| 69 | + /// |
| 70 | + /// @brief Deallocate memory mapped allocation |
| 71 | + /// |
| 72 | + /// Removes the MMapPtr, which triggers munmap in its destructor |
| 73 | + /// |
| 74 | + /// @param ptr Pointer to deallocate |
| 75 | + /// |
| 76 | + static void deallocate(void* ptr) { |
| 77 | + std::lock_guard lock{mutex_}; |
| 78 | + auto itr = allocations_.find(ptr); |
| 79 | + if (itr == allocations_.end()) { |
| 80 | + throw ANNEXCEPTION("Could not find memory-mapped allocation to deallocate!"); |
| 81 | + } |
| 82 | + |
| 83 | + // Erasing will destroy the MMapPtr, which calls munmap |
| 84 | + allocations_.erase(itr); |
| 85 | + } |
| 86 | + |
| 87 | + /// |
| 88 | + /// @brief Get count of current allocations (for debugging/testing) |
| 89 | + /// |
| 90 | + static size_t allocation_count() { |
| 91 | + std::lock_guard lock{mutex_}; |
| 92 | + return allocations_.size(); |
| 93 | + } |
| 94 | + |
| 95 | + private: |
| 96 | + inline static std::mutex mutex_{}; |
| 97 | + inline static tsl::robin_map<void*, MMapPtr<void>> allocations_{}; |
| 98 | +}; |
| 99 | + |
| 100 | +} // namespace detail |
| 101 | + |
| 102 | +/// |
| 103 | +/// @brief File-backed memory-mapped allocator for LeanVec secondary data |
| 104 | +/// |
| 105 | +/// This allocator uses memory-mapped files to store data on SSD rather than RAM. |
| 106 | +/// It's particularly useful for the secondary (full-dimension) dataset in LeanVec, |
| 107 | +/// which is accessed less frequently during search. |
| 108 | +/// |
| 109 | +/// @tparam T The value type for the allocator |
| 110 | +/// |
| 111 | +/// |
| 112 | +/// @brief Access pattern hint for memory-mapped allocations |
| 113 | +/// |
| 114 | +enum class MMapAccessHint { |
| 115 | + Normal, ///< Default access pattern |
| 116 | + Sequential, ///< Data will be accessed sequentially |
| 117 | + Random ///< Data will be accessed randomly |
| 118 | +}; |
| 119 | + |
| 120 | +template <typename T> class MMapAllocator { |
| 121 | + private: |
| 122 | + std::filesystem::path base_path_; |
| 123 | + size_t allocation_counter_ = 0; |
| 124 | + MMapAccessHint access_hint_ = MMapAccessHint::Normal; |
| 125 | + |
| 126 | + public: |
| 127 | + // C++ allocator type aliases |
| 128 | + using value_type = T; |
| 129 | + using propagate_on_container_copy_assignment = std::true_type; |
| 130 | + using propagate_on_container_move_assignment = std::true_type; |
| 131 | + using propagate_on_container_swap = std::true_type; |
| 132 | + using is_always_equal = std::false_type; // Allocators with different paths are different |
| 133 | + |
| 134 | + /// |
| 135 | + /// @brief Construct a new MMapAllocator |
| 136 | + /// |
| 137 | + /// @param base_path Directory path for storing memory-mapped files. |
| 138 | + /// If empty, will use /tmp with generated names. |
| 139 | + /// @param access_hint Hint about how the data will be accessed |
| 140 | + /// |
| 141 | + explicit MMapAllocator( |
| 142 | + std::filesystem::path base_path = {}, |
| 143 | + MMapAccessHint access_hint = MMapAccessHint::Normal |
| 144 | + ) |
| 145 | + : base_path_{std::move(base_path)} |
| 146 | + , access_hint_{access_hint} { |
| 147 | + if (!base_path_.empty() && !std::filesystem::exists(base_path_)) { |
| 148 | + std::filesystem::create_directories(base_path_); |
| 149 | + } |
| 150 | + } |
| 151 | + |
| 152 | + // Enable rebinding of allocators |
| 153 | + template <typename U> friend class MMapAllocator; |
| 154 | + |
| 155 | + template <typename U> |
| 156 | + MMapAllocator(const MMapAllocator<U>& other) |
| 157 | + : base_path_{other.base_path_} |
| 158 | + , allocation_counter_{other.allocation_counter_} |
| 159 | + , access_hint_{other.access_hint_} {} |
| 160 | + |
| 161 | + /// |
| 162 | + /// @brief Compare allocators |
| 163 | + /// |
| 164 | + /// Two allocators are equal if they use the same base path and access hint |
| 165 | + /// |
| 166 | + template <typename U> bool operator==(const MMapAllocator<U>& other) const { |
| 167 | + return base_path_ == other.base_path_ && access_hint_ == other.access_hint_; |
| 168 | + } |
| 169 | + |
| 170 | + /// |
| 171 | + /// @brief Allocate memory |
| 172 | + /// |
| 173 | + /// Creates a memory-mapped file and returns a pointer to it. |
| 174 | + /// Applies madvise hints based on the access hint. |
| 175 | + /// |
| 176 | + /// @param n Number of elements to allocate |
| 177 | + /// @return Pointer to allocated memory |
| 178 | + /// |
| 179 | + [[nodiscard]] T* allocate(size_t n) { |
| 180 | + size_t bytes = sizeof(T) * n; |
| 181 | + |
| 182 | + // Generate unique file path |
| 183 | + auto file_path = generate_file_path(bytes); |
| 184 | + |
| 185 | + void* ptr = detail::MMapAllocationManager{}.allocate(bytes, file_path); |
| 186 | + |
| 187 | + // Apply madvise hint if on Linux |
| 188 | + apply_access_hint(ptr, bytes); |
| 189 | + |
| 190 | + return static_cast<T*>(ptr); |
| 191 | + } |
| 192 | + |
| 193 | + /// |
| 194 | + /// @brief Deallocate memory |
| 195 | + /// |
| 196 | + /// Unmaps the memory-mapped file and cleans up. |
| 197 | + /// |
| 198 | + /// @param ptr Pointer to deallocate |
| 199 | + /// @param n Number of elements (unused but required by allocator interface) |
| 200 | + /// |
| 201 | + void deallocate(void* ptr, size_t SVS_UNUSED(n)) { |
| 202 | + detail::MMapAllocationManager::deallocate(ptr); |
| 203 | + } |
| 204 | + |
| 205 | + /// |
| 206 | + /// @brief Construct an object |
| 207 | + /// |
| 208 | + /// Performs default initialization of the object. |
| 209 | + /// |
| 210 | + void construct(T* ptr) { ::new (static_cast<void*>(ptr)) T; } |
| 211 | + |
| 212 | + /// |
| 213 | + /// @brief Get the base path for allocations |
| 214 | + /// |
| 215 | + const std::filesystem::path& get_base_path() const { return base_path_; } |
| 216 | + |
| 217 | + /// |
| 218 | + /// @brief Get the access hint |
| 219 | + /// |
| 220 | + MMapAccessHint get_access_hint() const { return access_hint_; } |
| 221 | + |
| 222 | + /// |
| 223 | + /// @brief Set the access hint for future allocations |
| 224 | + /// |
| 225 | + void set_access_hint(MMapAccessHint hint) { access_hint_ = hint; } |
| 226 | + |
| 227 | + private: |
| 228 | + /// |
| 229 | + /// @brief Apply madvise hint based on access pattern |
| 230 | + /// |
| 231 | + void apply_access_hint(void* ptr, size_t bytes) const { |
| 232 | +#ifdef __linux__ |
| 233 | + if (ptr == nullptr || bytes == 0) { |
| 234 | + return; |
| 235 | + } |
| 236 | + |
| 237 | + int advice = MADV_NORMAL; |
| 238 | + switch (access_hint_) { |
| 239 | + case MMapAccessHint::Normal: |
| 240 | + advice = MADV_NORMAL; |
| 241 | + break; |
| 242 | + case MMapAccessHint::Sequential: |
| 243 | + advice = MADV_SEQUENTIAL; |
| 244 | + break; |
| 245 | + case MMapAccessHint::Random: |
| 246 | + advice = MADV_RANDOM; |
| 247 | + break; |
| 248 | + } |
| 249 | + |
| 250 | + // madvise is a hint, so ignore errors |
| 251 | + (void)madvise(ptr, bytes, advice); |
| 252 | +#else |
| 253 | + (void)ptr; |
| 254 | + (void)bytes; |
| 255 | +#endif |
| 256 | + } |
| 257 | + /// |
| 258 | + /// @brief Generate a unique file path for an allocation |
| 259 | + /// |
| 260 | + std::filesystem::path generate_file_path(size_t bytes) { |
| 261 | + auto filename = fmt::format( |
| 262 | + "mmap_alloc_{}_{}_{}.dat", |
| 263 | + std::this_thread::get_id(), |
| 264 | + allocation_counter_++, |
| 265 | + bytes |
| 266 | + ); |
| 267 | + |
| 268 | + if (base_path_.empty()) { |
| 269 | + return std::filesystem::temp_directory_path() / filename; |
| 270 | + } |
| 271 | + return base_path_ / filename; |
| 272 | + } |
| 273 | +}; |
| 274 | + |
| 275 | +} // namespace svs |
0 commit comments