@@ -130,6 +130,9 @@ void heuristic_prune_neighbors(
130130
131131 auto pruned = std::vector<PruneState>(poolsize, PruneState::Available);
132132 float current_alpha = 1 .0f ;
133+ float anchor_dist = 0 .0f ;
134+ bool anchor_set = false ;
135+ bool all_duplicates = true ;
133136 while (result.size () < max_result_size && !cmp (alpha, current_alpha)) {
134137 size_t start = 0 ;
135138 while (result.size () < max_result_size && start < poolsize) {
@@ -145,6 +148,16 @@ void heuristic_prune_neighbors(
145148 const auto & query = accessor (dataset, id);
146149 distance::maybe_fix_argument (distance_function, query);
147150 result.push_back (detail::construct_as (lib::Type<I>(), pool[start]));
151+
152+ if (all_duplicates) {
153+ if (!anchor_set) {
154+ anchor_dist = pool[start].distance ();
155+ anchor_set = true ;
156+ } else if (pool[start].distance () != anchor_dist) {
157+ all_duplicates = false ;
158+ }
159+ }
160+
148161 for (size_t t = start + 1 ; t < poolsize; ++t) {
149162 if (excluded (pruned[t])) {
150163 continue ;
@@ -171,6 +184,40 @@ void heuristic_prune_neighbors(
171184 }
172185 current_alpha *= alpha;
173186 }
187+
188+ // Add a diversity edge if a duplicate cluster is detected
189+ if (all_duplicates && anchor_set && !result.empty ()) {
190+ auto result_id = [](const I& r) -> size_t {
191+ if constexpr (std::integral<I>) {
192+ return static_cast <size_t >(r);
193+ } else {
194+ return static_cast <size_t >(r.id ());
195+ }
196+ };
197+ for (size_t t = 0 ; t < poolsize; ++t) {
198+ const auto & candidate = pool[t];
199+ auto cid = candidate.id ();
200+ if (cid == current_node_id || candidate.distance () == anchor_dist) {
201+ continue ;
202+ }
203+ bool in_result = false ;
204+ for (const auto & r : result) {
205+ if (result_id (r) == static_cast <size_t >(cid)) {
206+ in_result = true ;
207+ break ;
208+ }
209+ }
210+ assert (
211+ !in_result &&
212+ " Candidate with non-anchor distance should not already be in result"
213+ );
214+ if (in_result) {
215+ continue ;
216+ }
217+ result.back () = detail::construct_as (lib::Type<I>(), candidate);
218+ break ;
219+ }
220+ }
174221}
175222
176223template <
@@ -203,6 +250,9 @@ void heuristic_prune_neighbors(
203250 std::vector<float > pruned (poolsize, type_traits::tombstone_v<float , decltype (cmp)>);
204251
205252 float current_alpha = 1 .0f ;
253+ float anchor_dist = 0 .0f ;
254+ bool anchor_set = false ;
255+ bool all_duplicates = true ;
206256 while (result.size () < max_result_size && !cmp (alpha, current_alpha)) {
207257 size_t start = 0 ;
208258 while (result.size () < max_result_size && start < poolsize) {
@@ -218,6 +268,16 @@ void heuristic_prune_neighbors(
218268 const auto & query = accessor (dataset, id);
219269 distance::maybe_fix_argument (distance_function, query);
220270 result.push_back (detail::construct_as (lib::Type<I>(), pool[start]));
271+
272+ if (all_duplicates) {
273+ if (!anchor_set) {
274+ anchor_dist = pool[start].distance ();
275+ anchor_set = true ;
276+ } else if (pool[start].distance () != anchor_dist) {
277+ all_duplicates = false ;
278+ }
279+ }
280+
221281 for (size_t t = start + 1 ; t < poolsize; ++t) {
222282 if (cmp (current_alpha, pruned[t])) {
223283 continue ;
@@ -236,6 +296,40 @@ void heuristic_prune_neighbors(
236296 }
237297 current_alpha *= alpha;
238298 }
299+
300+ // Add a diversity edge if a duplicate cluster is detected
301+ if (all_duplicates && anchor_set && !result.empty ()) {
302+ auto result_id = [](const I& r) -> size_t {
303+ if constexpr (std::integral<I>) {
304+ return static_cast <size_t >(r);
305+ } else {
306+ return static_cast <size_t >(r.id ());
307+ }
308+ };
309+ for (size_t t = 0 ; t < poolsize; ++t) {
310+ const auto & candidate = pool[t];
311+ auto cid = candidate.id ();
312+ if (cid == current_node_id || candidate.distance () == anchor_dist) {
313+ continue ;
314+ }
315+ bool in_result = false ;
316+ for (const auto & r : result) {
317+ if (result_id (r) == static_cast <size_t >(cid)) {
318+ in_result = true ;
319+ break ;
320+ }
321+ }
322+ assert (
323+ !in_result &&
324+ " Candidate with non-anchor distance should not already be in result"
325+ );
326+ if (in_result) {
327+ continue ;
328+ }
329+ result.back () = detail::construct_as (lib::Type<I>(), candidate);
330+ break ;
331+ }
332+ }
239333}
240334
241335// /
0 commit comments