3030
3131namespace svsbenchmark ::vamana {
3232
33- // / Pre-configuration for the linear schedule.
34- struct LinearSchedulePrototype {
35- size_t scale_search_window_;
36- size_t scale_buffer_capacity_;
37- int64_t enable_filter_after_;
38- size_t batch_size_start_;
39- size_t scale_batch_size_;
40- // Whether search should be restarted on every iteration.
41- bool restart_searches_;
42-
43- // /// Saving and Loading.
44- static constexpr std::string_view serialization_schema =
45- " svsbench_vamana_iter_schedule" ;
46- static constexpr svs::lib::Version save_version{0 , 0 , 0 };
47-
48- svs::lib::SaveTable save () const {
49- return svs::lib::SaveTable{
50- serialization_schema,
51- save_version,
52- {SVS_LIST_SAVE_ (scale_search_window),
53- SVS_LIST_SAVE_ (scale_buffer_capacity),
54- SVS_LIST_SAVE_ (enable_filter_after),
55- SVS_LIST_SAVE_ (batch_size_start),
56- SVS_LIST_SAVE_ (scale_batch_size),
57- SVS_LIST_SAVE_ (restart_searches)}};
58- }
59-
60- static LinearSchedulePrototype load (const svs::lib::ContextFreeLoadTable& table) {
61- return LinearSchedulePrototype{
62- SVS_LOAD_MEMBER_AT_ (table, scale_search_window),
63- SVS_LOAD_MEMBER_AT_ (table, scale_buffer_capacity),
64- SVS_LOAD_MEMBER_AT_ (table, enable_filter_after),
65- SVS_LOAD_MEMBER_AT_ (table, batch_size_start),
66- SVS_LOAD_MEMBER_AT_ (table, scale_batch_size),
67- SVS_LOAD_MEMBER_AT_ (table, restart_searches)};
68- }
69-
70- // Return several representative examples for the schedule.
71- static std::vector<LinearSchedulePrototype> examples () {
72- return {{10 , 20 , -1 , 10 , 0 , false }, {10 , 10 , 3 , 10 , 5 , false }};
73- }
74-
75- // Should search be restarted from scratch every iteration.
76- bool restart_every_iteration () const { return restart_searches_; }
77-
78- // Materialize an actual schedule given a set of base parameters.
79- // NOTE: This does not propagate the `restart_searches_` flag.
80- svs::index::vamana::LinearSchedule
81- materialize (const svs::index::vamana::VamanaSearchParameters& sp) const {
82- return svs::index::vamana::LinearSchedule{
83- sp,
84- svs::lib::narrow<uint16_t >(scale_search_window_),
85- svs::lib::narrow<uint16_t >(scale_buffer_capacity_),
86- svs::lib::narrow<int16_t >(enable_filter_after_),
87- svs::lib::narrow<uint16_t >(batch_size_start_),
88- svs::lib::narrow<uint16_t >(scale_batch_size_)};
89- }
90- };
91-
9233struct IteratorSearchParameters {
9334 public:
9435 // /// Members
95- // The schedules to try .
96- std::vector<LinearSchedulePrototype> schedules_ ;
36+ // Batch sizes to use for the iterator .
37+ std::vector<size_t > batch_sizes_{{ 10 , 20 }} ;
9738 // target recalls relative to base number of neighbors.
9839 std::vector<svs::lib::Percent> target_recalls_;
9940 // The number of batches to yield.
@@ -108,7 +49,7 @@ struct IteratorSearchParameters {
10849
10950 static IteratorSearchParameters example () {
11051 return IteratorSearchParameters{
111- .schedules_ = LinearSchedulePrototype::examples () ,
52+ .batch_sizes_ = { 10 } ,
11253 .target_recalls_ = {svs::lib::Percent (0.9 )},
11354 .num_batches_ = 5 ,
11455 .query_subsample_ = 10 ,
@@ -119,15 +60,15 @@ struct IteratorSearchParameters {
11960 return svs::lib::SaveTable{
12061 serialization_schema,
12162 save_version,
122- {SVS_LIST_SAVE_ (schedules ),
63+ {SVS_LIST_SAVE_ (batch_sizes ),
12364 SVS_LIST_SAVE_ (target_recalls),
12465 SVS_LIST_SAVE_ (num_batches),
12566 SVS_LIST_SAVE_ (query_subsample)}};
12667 }
12768
12869 static IteratorSearchParameters load (const svs::lib::ContextFreeLoadTable& table) {
12970 return IteratorSearchParameters{
130- SVS_LOAD_MEMBER_AT_ (table, schedules ),
71+ SVS_LOAD_MEMBER_AT_ (table, batch_sizes ),
13172 SVS_LOAD_MEMBER_AT_ (table, target_recalls),
13273 SVS_LOAD_MEMBER_AT_ (table, num_batches),
13374 SVS_LOAD_MEMBER_AT_ (table, query_subsample)};
@@ -279,30 +220,25 @@ struct YieldedResult {
279220
280221// TODO: Make the dependence on `Report` looser.
281222template <typename Index> struct QueryIteratorResult {
282- LinearSchedulePrototype schedule_ ;
223+ size_t batch_size_ ;
283224 size_t num_batches_;
284225 double target_recall_;
285226 search::RunReport<Index> report_;
286- // The search parameters used for each iteration.
287- // Must be the same for all queries in the batch.
288- std::vector<svs::index::vamana::VamanaSearchParameters> iteration_parameters_;
289227 // Outer vector: Results for each query.
290228 // Inner vector: Results within a query.
291229 std::vector<std::vector<YieldedResult>> results_;
292230
293231 // /// Constructor
294232 QueryIteratorResult (
295- const LinearSchedulePrototype& schedule ,
233+ size_t batch_size ,
296234 double target_recall,
297235 search::RunReport<Index> report,
298- std::vector<svs::index::vamana::VamanaSearchParameters> iteration_parameters,
299236 std::vector<std::vector<YieldedResult>> results
300237 )
301- : schedule_{schedule }
302- , num_batches_{iteration_parameters .size ()}
238+ : batch_size_{batch_size }
239+ , num_batches_{results. at ( 0 ) .size ()}
303240 , target_recall_{target_recall}
304241 , report_{std::move (report)}
305- , iteration_parameters_{std::move (iteration_parameters)}
306242 , results_{std::move (results)} {
307243 // Ensure all the yielded results have the correct size.
308244 for (size_t i = 0 , imax = results_.size (); i < imax; ++i) {
@@ -326,11 +262,10 @@ template <typename Index> struct QueryIteratorResult {
326262 return svs::lib::SaveTable{
327263 serialization_schema,
328264 save_version,
329- {SVS_LIST_SAVE_ (schedule ),
265+ {SVS_LIST_SAVE_ (batch_size ),
330266 SVS_LIST_SAVE_ (num_batches),
331267 SVS_LIST_SAVE_ (target_recall),
332268 SVS_LIST_SAVE_ (report),
333- SVS_LIST_SAVE_ (iteration_parameters),
334269 SVS_LIST_SAVE_ (results)}};
335270 }
336271};
@@ -360,15 +295,14 @@ std::vector<QueryIteratorResult<Index>> tune_and_search_iterator(
360295
361296 // Loop over each batchsize.
362297 auto query_iterator_results = std::vector<QueryIteratorResult<Index>>{};
363- for (const auto & schedule : parameters.schedules_ ) {
364- auto initial_batch_size = schedule.batch_size_start_ ;
298+ for (const auto & batch_size : parameters.batch_sizes_ ) {
365299 for (auto target_recall : parameters.target_recalls_ ) {
366300 // Calibrate the index for the given recall.
367301 auto config = traits::calibrate (
368302 index,
369303 query_set.training_set_ ,
370304 query_set.training_set_groundtruth_ ,
371- initial_batch_size ,
305+ batch_size ,
372306 target_recall.value (),
373307 context,
374308 extra
@@ -379,7 +313,7 @@ std::vector<QueryIteratorResult<Index>> tune_and_search_iterator(
379313 index,
380314 query_set.test_set_ ,
381315 query_set.test_set_groundtruth_ ,
382- initial_batch_size ,
316+ batch_size ,
383317 target_recall.value (),
384318 svsbenchmark::CalibrateContext::TestSetTune,
385319 config,
@@ -389,7 +323,7 @@ std::vector<QueryIteratorResult<Index>> tune_and_search_iterator(
389323 // Now we have a calibrated configuration - obtain a baseline report for
390324 // searching with this batchsize.
391325 auto report = svsbenchmark::search::search_with_config (
392- index, config, query_test, groundtruth_test, initial_batch_size
326+ index, config, query_test, groundtruth_test, batch_size
393327 );
394328
395329 // `resuilt_buffer`: All results that have been returned by the iterator.
@@ -452,38 +386,34 @@ std::vector<QueryIteratorResult<Index>> tune_and_search_iterator(
452386
453387 // The first call to `iterator` kick-starts graph search.
454388 auto tic = svs::lib::now ();
455- auto iterator = make_iterator (index, query, config, schedule);
389+ auto iterator = make_iterator (index, query);
390+ iterator.next (config.buffer_config_ .get_search_window_size ());
456391 auto elapsed = svs::lib::time_difference (tic);
457392 if (i == 0 ) {
458- iteration_parameters.push_back (iterator.parameters_for_current_batch ());
393+ iteration_parameters.push_back (
394+ iterator.parameters_for_current_iteration ()
395+ );
459396 }
460397
461398 timings_for_this_query.push_back (tally (iterator, i, 0 , elapsed));
462399 for (size_t j = 0 ; j < parameters.num_batches_ ; ++j) {
463- // If requested by the parent schedule, reset search for this
464- // iteration.
465- if (schedule.restart_every_iteration ()) {
466- iterator.restart_next_search ();
467- }
468-
469400 tic = svs::lib::now ();
470- iterator.next ();
401+ iterator.next (batch_size );
471402 elapsed = svs::lib::time_difference (tic);
472403 timings_for_this_query.push_back (tally (iterator, i, j + 1 , elapsed));
473404 if (i == 0 ) {
474405 iteration_parameters.push_back (
475- iterator.parameters_for_current_batch ()
406+ iterator.parameters_for_current_iteration ()
476407 );
477408 }
478409 }
479410 }
480411
481412 // Finish up summarizing these results.
482413 query_iterator_results.emplace_back (
483- schedule ,
414+ batch_size ,
484415 target_recall.value (),
485416 std::move (report),
486- std::move (iteration_parameters),
487417 std::move (yielded_results)
488418 );
489419 do_checkpoint (query_iterator_results);
@@ -522,9 +452,7 @@ toml::table tune_and_search_iterator(
522452 job.parameters_ ,
523453 query_set,
524454 svsbenchmark::CalibrateContext::InitialTrainingSet,
525- [](const auto & index, const auto & query, const auto & config, const auto & schedule) {
526- return index.batch_iterator (query, schedule.materialize (config));
527- },
455+ [](const auto & index, const auto & query) { return index.batch_iterator (query); },
528456 do_checkpoint,
529457 svsbenchmark::IndexTraits<Index>::regression_optimization ()
530458 );
0 commit comments