Skip to content

Commit c090f55

Browse files
committed
Add MMapAllocator for SSD-backed memory-mapped data allocation
Add MMapAllocator that allocates memory via mmap-backed files on a configurable path (e.g. NVMe/SSD). This enables placing secondary data structures on SSD while keeping primary data in RAM, reducing heap memory usage for large-scale vector search. - include/svs/core/allocator_mmap.h: MMapAllocator with configurable base path, access hints (Sequential/Random/Normal), and automatic file cleanup on deallocation. - tests/svs/core/test_allocator_mmap.cpp: Unit tests for allocation, deallocation, file creation, and access hint propagation. - tests/CMakeLists.txt: Register mmap allocator test.
1 parent 4c831ff commit c090f55

3 files changed

Lines changed: 489 additions & 0 deletions

File tree

include/svs/core/allocator_mmap.h

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
/*
2+
* Copyright 2026 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include "svs/core/allocator.h"
20+
#include "svs/lib/exception.h"
21+
#include "svs/lib/memory.h"
22+
23+
#include "tsl/robin_map.h"
24+
#include "fmt/core.h"
25+
26+
#include <filesystem>
27+
#include <memory>
28+
#include <mutex>
29+
#include <string>
30+
#include <thread>
31+
#include <sys/mman.h>
32+
33+
namespace svs {
34+
35+
namespace detail {
36+
37+
///
38+
/// @brief Manager for file-backed memory mapped allocations
39+
///
40+
/// Tracks memory-mapped allocations by keeping MMapPtr objects alive.
41+
/// Thread-safe for concurrent allocations.
42+
///
43+
class MMapAllocationManager {
44+
public:
45+
MMapAllocationManager() = default;
46+
47+
///
48+
/// @brief Allocate memory mapped to a file
49+
///
50+
/// @param bytes Number of bytes to allocate
51+
/// @param file_path Path to the file for backing storage
52+
/// @return Pointer to the allocated memory
53+
///
54+
[[nodiscard]] void* allocate(size_t bytes, const std::filesystem::path& file_path) {
55+
MemoryMapper mapper{MemoryMapper::ReadWrite, MemoryMapper::MayCreate};
56+
auto mmap_ptr = mapper.mmap(file_path, lib::Bytes(bytes));
57+
58+
void* ptr = mmap_ptr.data();
59+
60+
// Store the MMapPtr to keep the mapping alive
61+
{
62+
std::lock_guard lock{mutex_};
63+
allocations_.insert({ptr, std::move(mmap_ptr)});
64+
}
65+
66+
return ptr;
67+
}
68+
69+
///
70+
/// @brief Deallocate memory mapped allocation
71+
///
72+
/// Removes the MMapPtr, which triggers munmap in its destructor
73+
///
74+
/// @param ptr Pointer to deallocate
75+
///
76+
static void deallocate(void* ptr) {
77+
std::lock_guard lock{mutex_};
78+
auto itr = allocations_.find(ptr);
79+
if (itr == allocations_.end()) {
80+
throw ANNEXCEPTION("Could not find memory-mapped allocation to deallocate!");
81+
}
82+
83+
// Erasing will destroy the MMapPtr, which calls munmap
84+
allocations_.erase(itr);
85+
}
86+
87+
///
88+
/// @brief Get count of current allocations (for debugging/testing)
89+
///
90+
static size_t allocation_count() {
91+
std::lock_guard lock{mutex_};
92+
return allocations_.size();
93+
}
94+
95+
private:
96+
inline static std::mutex mutex_{};
97+
inline static tsl::robin_map<void*, MMapPtr<void>> allocations_{};
98+
};
99+
100+
} // namespace detail
101+
102+
///
103+
/// @brief File-backed memory-mapped allocator for LeanVec secondary data
104+
///
105+
/// This allocator uses memory-mapped files to store data on SSD rather than RAM.
106+
/// It's particularly useful for the secondary (full-dimension) dataset in LeanVec,
107+
/// which is accessed less frequently during search.
108+
///
109+
/// @tparam T The value type for the allocator
110+
///
111+
///
112+
/// @brief Access pattern hint for memory-mapped allocations
113+
///
114+
enum class MMapAccessHint {
115+
Normal, ///< Default access pattern
116+
Sequential, ///< Data will be accessed sequentially
117+
Random ///< Data will be accessed randomly
118+
};
119+
120+
template <typename T> class MMapAllocator {
121+
private:
122+
std::filesystem::path base_path_;
123+
size_t allocation_counter_ = 0;
124+
MMapAccessHint access_hint_ = MMapAccessHint::Normal;
125+
126+
public:
127+
// C++ allocator type aliases
128+
using value_type = T;
129+
using propagate_on_container_copy_assignment = std::true_type;
130+
using propagate_on_container_move_assignment = std::true_type;
131+
using propagate_on_container_swap = std::true_type;
132+
using is_always_equal = std::false_type; // Allocators with different paths are different
133+
134+
///
135+
/// @brief Construct a new MMapAllocator
136+
///
137+
/// @param base_path Directory path for storing memory-mapped files.
138+
/// If empty, will use /tmp with generated names.
139+
/// @param access_hint Hint about how the data will be accessed
140+
///
141+
explicit MMapAllocator(
142+
std::filesystem::path base_path = {},
143+
MMapAccessHint access_hint = MMapAccessHint::Normal
144+
)
145+
: base_path_{std::move(base_path)}
146+
, access_hint_{access_hint} {
147+
if (!base_path_.empty() && !std::filesystem::exists(base_path_)) {
148+
std::filesystem::create_directories(base_path_);
149+
}
150+
}
151+
152+
// Enable rebinding of allocators
153+
template <typename U> friend class MMapAllocator;
154+
155+
template <typename U>
156+
MMapAllocator(const MMapAllocator<U>& other)
157+
: base_path_{other.base_path_}
158+
, allocation_counter_{other.allocation_counter_}
159+
, access_hint_{other.access_hint_} {}
160+
161+
///
162+
/// @brief Compare allocators
163+
///
164+
/// Two allocators are equal if they use the same base path and access hint
165+
///
166+
template <typename U> bool operator==(const MMapAllocator<U>& other) const {
167+
return base_path_ == other.base_path_ && access_hint_ == other.access_hint_;
168+
}
169+
170+
///
171+
/// @brief Allocate memory
172+
///
173+
/// Creates a memory-mapped file and returns a pointer to it.
174+
/// Applies madvise hints based on the access hint.
175+
///
176+
/// @param n Number of elements to allocate
177+
/// @return Pointer to allocated memory
178+
///
179+
[[nodiscard]] T* allocate(size_t n) {
180+
size_t bytes = sizeof(T) * n;
181+
182+
// Generate unique file path
183+
auto file_path = generate_file_path(bytes);
184+
185+
void* ptr = detail::MMapAllocationManager{}.allocate(bytes, file_path);
186+
187+
// Apply madvise hint if on Linux
188+
apply_access_hint(ptr, bytes);
189+
190+
return static_cast<T*>(ptr);
191+
}
192+
193+
///
194+
/// @brief Deallocate memory
195+
///
196+
/// Unmaps the memory-mapped file and cleans up.
197+
///
198+
/// @param ptr Pointer to deallocate
199+
/// @param n Number of elements (unused but required by allocator interface)
200+
///
201+
void deallocate(void* ptr, size_t SVS_UNUSED(n)) {
202+
detail::MMapAllocationManager::deallocate(ptr);
203+
}
204+
205+
///
206+
/// @brief Construct an object
207+
///
208+
/// Performs default initialization of the object.
209+
///
210+
void construct(T* ptr) { ::new (static_cast<void*>(ptr)) T; }
211+
212+
///
213+
/// @brief Get the base path for allocations
214+
///
215+
const std::filesystem::path& get_base_path() const { return base_path_; }
216+
217+
///
218+
/// @brief Get the access hint
219+
///
220+
MMapAccessHint get_access_hint() const { return access_hint_; }
221+
222+
///
223+
/// @brief Set the access hint for future allocations
224+
///
225+
void set_access_hint(MMapAccessHint hint) { access_hint_ = hint; }
226+
227+
private:
228+
///
229+
/// @brief Apply madvise hint based on access pattern
230+
///
231+
void apply_access_hint(void* ptr, size_t bytes) const {
232+
#ifdef __linux__
233+
if (ptr == nullptr || bytes == 0) {
234+
return;
235+
}
236+
237+
int advice = MADV_NORMAL;
238+
switch (access_hint_) {
239+
case MMapAccessHint::Normal:
240+
advice = MADV_NORMAL;
241+
break;
242+
case MMapAccessHint::Sequential:
243+
advice = MADV_SEQUENTIAL;
244+
break;
245+
case MMapAccessHint::Random:
246+
advice = MADV_RANDOM;
247+
break;
248+
}
249+
250+
// madvise is a hint, so ignore errors
251+
(void)madvise(ptr, bytes, advice);
252+
#else
253+
(void)ptr;
254+
(void)bytes;
255+
#endif
256+
}
257+
///
258+
/// @brief Generate a unique file path for an allocation
259+
///
260+
std::filesystem::path generate_file_path(size_t bytes) {
261+
auto filename = fmt::format(
262+
"mmap_alloc_{}_{}_{}.dat",
263+
std::this_thread::get_id(),
264+
allocation_counter_++,
265+
bytes
266+
);
267+
268+
if (base_path_.empty()) {
269+
return std::filesystem::temp_directory_path() / filename;
270+
}
271+
return base_path_ / filename;
272+
}
273+
};
274+
275+
} // namespace svs

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ set(TEST_SOURCES
102102
${TEST_DIR}/svs/concepts/distance.cpp
103103
# Core
104104
${TEST_DIR}/svs/core/allocator.cpp
105+
${TEST_DIR}/svs/core/allocator_mmap.cpp
105106
${TEST_DIR}/svs/core/compact.cpp
106107
${TEST_DIR}/svs/core/data.cpp
107108
${TEST_DIR}/svs/core/data/block.cpp

0 commit comments

Comments
 (0)