Skip to content

Commit 35c2253

Browse files
committed
Add MMapAllocator for SSD-backed memory-mapped data allocation
Add MMapAllocator that allocates memory via mmap-backed files on a configurable path (e.g. NVMe/SSD). This enables placing secondary data structures on SSD while keeping primary data in RAM, reducing heap memory usage for large-scale vector search. - include/svs/core/allocator_mmap.h: MMapAllocator with configurable base path, access hints (Sequential/Random/Normal), and automatic file cleanup on deallocation. - tests/svs/core/test_allocator_mmap.cpp: Unit tests for allocation, deallocation, file creation, and access hint propagation. - tests/CMakeLists.txt: Register mmap allocator test.
1 parent 4c831ff commit 35c2253

3 files changed

Lines changed: 490 additions & 0 deletions

File tree

include/svs/core/allocator_mmap.h

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
/*
2+
* Copyright 2026 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include "svs/core/allocator.h"
20+
#include "svs/lib/exception.h"
21+
#include "svs/lib/memory.h"
22+
23+
#include "fmt/core.h"
24+
#include "tsl/robin_map.h"
25+
26+
#include <filesystem>
27+
#include <memory>
28+
#include <mutex>
29+
#include <string>
30+
#include <sys/mman.h>
31+
#include <thread>
32+
33+
namespace svs {
34+
35+
namespace detail {
36+
37+
///
38+
/// @brief Manager for file-backed memory mapped allocations
39+
///
40+
/// Tracks memory-mapped allocations by keeping MMapPtr objects alive.
41+
/// Thread-safe for concurrent allocations.
42+
///
43+
class MMapAllocationManager {
44+
public:
45+
MMapAllocationManager() = default;
46+
47+
///
48+
/// @brief Allocate memory mapped to a file
49+
///
50+
/// @param bytes Number of bytes to allocate
51+
/// @param file_path Path to the file for backing storage
52+
/// @return Pointer to the allocated memory
53+
///
54+
[[nodiscard]] void* allocate(size_t bytes, const std::filesystem::path& file_path) {
55+
MemoryMapper mapper{MemoryMapper::ReadWrite, MemoryMapper::MayCreate};
56+
auto mmap_ptr = mapper.mmap(file_path, lib::Bytes(bytes));
57+
58+
void* ptr = mmap_ptr.data();
59+
60+
// Store the MMapPtr to keep the mapping alive
61+
{
62+
std::lock_guard lock{mutex_};
63+
allocations_.insert({ptr, std::move(mmap_ptr)});
64+
}
65+
66+
return ptr;
67+
}
68+
69+
///
70+
/// @brief Deallocate memory mapped allocation
71+
///
72+
/// Removes the MMapPtr, which triggers munmap in its destructor
73+
///
74+
/// @param ptr Pointer to deallocate
75+
///
76+
static void deallocate(void* ptr) {
77+
std::lock_guard lock{mutex_};
78+
auto itr = allocations_.find(ptr);
79+
if (itr == allocations_.end()) {
80+
throw ANNEXCEPTION("Could not find memory-mapped allocation to deallocate!");
81+
}
82+
83+
// Erasing will destroy the MMapPtr, which calls munmap
84+
allocations_.erase(itr);
85+
}
86+
87+
///
88+
/// @brief Get count of current allocations (for debugging/testing)
89+
///
90+
static size_t allocation_count() {
91+
std::lock_guard lock{mutex_};
92+
return allocations_.size();
93+
}
94+
95+
private:
96+
inline static std::mutex mutex_{};
97+
inline static tsl::robin_map<void*, MMapPtr<void>> allocations_{};
98+
};
99+
100+
} // namespace detail
101+
102+
///
103+
/// @brief File-backed memory-mapped allocator for LeanVec secondary data
104+
///
105+
/// This allocator uses memory-mapped files to store data on SSD rather than RAM.
106+
/// It's particularly useful for the secondary (full-dimension) dataset in LeanVec,
107+
/// which is accessed less frequently during search.
108+
///
109+
/// @tparam T The value type for the allocator
110+
///
111+
///
112+
/// @brief Access pattern hint for memory-mapped allocations
113+
///
114+
enum class MMapAccessHint {
115+
Normal, ///< Default access pattern
116+
Sequential, ///< Data will be accessed sequentially
117+
Random ///< Data will be accessed randomly
118+
};
119+
120+
template <typename T> class MMapAllocator {
121+
private:
122+
std::filesystem::path base_path_;
123+
size_t allocation_counter_ = 0;
124+
MMapAccessHint access_hint_ = MMapAccessHint::Normal;
125+
126+
public:
127+
// C++ allocator type aliases
128+
using value_type = T;
129+
using propagate_on_container_copy_assignment = std::true_type;
130+
using propagate_on_container_move_assignment = std::true_type;
131+
using propagate_on_container_swap = std::true_type;
132+
using is_always_equal =
133+
std::false_type; // Allocators with different paths are different
134+
135+
///
136+
/// @brief Construct a new MMapAllocator
137+
///
138+
/// @param base_path Directory path for storing memory-mapped files.
139+
/// If empty, will use /tmp with generated names.
140+
/// @param access_hint Hint about how the data will be accessed
141+
///
142+
explicit MMapAllocator(
143+
std::filesystem::path base_path = {},
144+
MMapAccessHint access_hint = MMapAccessHint::Normal
145+
)
146+
: base_path_{std::move(base_path)}
147+
, access_hint_{access_hint} {
148+
if (!base_path_.empty() && !std::filesystem::exists(base_path_)) {
149+
std::filesystem::create_directories(base_path_);
150+
}
151+
}
152+
153+
// Enable rebinding of allocators
154+
template <typename U> friend class MMapAllocator;
155+
156+
template <typename U>
157+
MMapAllocator(const MMapAllocator<U>& other)
158+
: base_path_{other.base_path_}
159+
, allocation_counter_{other.allocation_counter_}
160+
, access_hint_{other.access_hint_} {}
161+
162+
///
163+
/// @brief Compare allocators
164+
///
165+
/// Two allocators are equal if they use the same base path and access hint
166+
///
167+
template <typename U> bool operator==(const MMapAllocator<U>& other) const {
168+
return base_path_ == other.base_path_ && access_hint_ == other.access_hint_;
169+
}
170+
171+
///
172+
/// @brief Allocate memory
173+
///
174+
/// Creates a memory-mapped file and returns a pointer to it.
175+
/// Applies madvise hints based on the access hint.
176+
///
177+
/// @param n Number of elements to allocate
178+
/// @return Pointer to allocated memory
179+
///
180+
[[nodiscard]] T* allocate(size_t n) {
181+
size_t bytes = sizeof(T) * n;
182+
183+
// Generate unique file path
184+
auto file_path = generate_file_path(bytes);
185+
186+
void* ptr = detail::MMapAllocationManager{}.allocate(bytes, file_path);
187+
188+
// Apply madvise hint if on Linux
189+
apply_access_hint(ptr, bytes);
190+
191+
return static_cast<T*>(ptr);
192+
}
193+
194+
///
195+
/// @brief Deallocate memory
196+
///
197+
/// Unmaps the memory-mapped file and cleans up.
198+
///
199+
/// @param ptr Pointer to deallocate
200+
/// @param n Number of elements (unused but required by allocator interface)
201+
///
202+
void deallocate(void* ptr, size_t SVS_UNUSED(n)) {
203+
detail::MMapAllocationManager::deallocate(ptr);
204+
}
205+
206+
///
207+
/// @brief Construct an object
208+
///
209+
/// Performs default initialization of the object.
210+
///
211+
void construct(T* ptr) { ::new (static_cast<void*>(ptr)) T; }
212+
213+
///
214+
/// @brief Get the base path for allocations
215+
///
216+
const std::filesystem::path& get_base_path() const { return base_path_; }
217+
218+
///
219+
/// @brief Get the access hint
220+
///
221+
MMapAccessHint get_access_hint() const { return access_hint_; }
222+
223+
///
224+
/// @brief Set the access hint for future allocations
225+
///
226+
void set_access_hint(MMapAccessHint hint) { access_hint_ = hint; }
227+
228+
private:
229+
///
230+
/// @brief Apply madvise hint based on access pattern
231+
///
232+
void apply_access_hint(void* ptr, size_t bytes) const {
233+
#ifdef __linux__
234+
if (ptr == nullptr || bytes == 0) {
235+
return;
236+
}
237+
238+
int advice = MADV_NORMAL;
239+
switch (access_hint_) {
240+
case MMapAccessHint::Normal:
241+
advice = MADV_NORMAL;
242+
break;
243+
case MMapAccessHint::Sequential:
244+
advice = MADV_SEQUENTIAL;
245+
break;
246+
case MMapAccessHint::Random:
247+
advice = MADV_RANDOM;
248+
break;
249+
}
250+
251+
// madvise is a hint, so ignore errors
252+
(void)madvise(ptr, bytes, advice);
253+
#else
254+
(void)ptr;
255+
(void)bytes;
256+
#endif
257+
}
258+
///
259+
/// @brief Generate a unique file path for an allocation
260+
///
261+
std::filesystem::path generate_file_path(size_t bytes) {
262+
auto filename = fmt::format(
263+
"mmap_alloc_{}_{}_{}.dat",
264+
std::this_thread::get_id(),
265+
allocation_counter_++,
266+
bytes
267+
);
268+
269+
if (base_path_.empty()) {
270+
return std::filesystem::temp_directory_path() / filename;
271+
}
272+
return base_path_ / filename;
273+
}
274+
};
275+
276+
} // namespace svs

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ set(TEST_SOURCES
102102
${TEST_DIR}/svs/concepts/distance.cpp
103103
# Core
104104
${TEST_DIR}/svs/core/allocator.cpp
105+
${TEST_DIR}/svs/core/allocator_mmap.cpp
105106
${TEST_DIR}/svs/core/compact.cpp
106107
${TEST_DIR}/svs/core/data.cpp
107108
${TEST_DIR}/svs/core/data/block.cpp

0 commit comments

Comments
 (0)