1616 */
1717package org .apache .solr .handler .component .combine ;
1818
19+ import java .io .IOException ;
1920import java .util .HashMap ;
21+ import java .util .HashSet ;
2022import java .util .List ;
2123import java .util .Map ;
24+ import java .util .Set ;
25+ import org .apache .lucene .index .LeafReaderContext ;
26+ import org .apache .lucene .internal .hppc .IntDoubleHashMap ;
27+ import org .apache .lucene .queries .function .FunctionScoreQuery ;
28+ import org .apache .lucene .search .DoubleValues ;
29+ import org .apache .lucene .search .DoubleValuesSource ;
2230import org .apache .lucene .search .Explanation ;
31+ import org .apache .lucene .search .IndexSearcher ;
32+ import org .apache .lucene .search .Query ;
2333import org .apache .lucene .search .TotalHits ;
2434import org .apache .solr .common .SolrException ;
2535import org .apache .solr .common .params .SolrParams ;
36+ import org .apache .solr .common .util .CollectionUtil ;
2637import org .apache .solr .common .util .SimpleOrderedMap ;
2738import org .apache .solr .handler .component .ShardDoc ;
2839import org .apache .solr .search .DocIterator ;
2940import org .apache .solr .search .DocSet ;
3041import org .apache .solr .search .DocSlice ;
42+ import org .apache .solr .search .QueryCommand ;
3143import org .apache .solr .search .QueryResult ;
44+ import org .apache .solr .search .SolrIndexSearcher ;
45+ import org .apache .solr .search .SortedIntDocSet ;
3246import org .apache .solr .util .plugin .NamedListInitializedPlugin ;
3347
3448/**
@@ -49,12 +63,19 @@ public abstract List<ShardDoc> combine(
4963 Map <String , List <ShardDoc >> queriesDocMap , SolrParams solrParams );
5064
5165 /**
52- * Simple combine query result list as a union.
66+ * Combine query result list as a union, optionally deduplicating by a collapse field. When a
67+ * collapse filter is provided, only one document per unique field value is kept (based on the
68+ * collapse sort/score selection). This ensures that collapse semantics are preserved across
69+ * combined queries.
5370 *
5471 * @param queryResults the query results to be combined
72+ * @param collapseFilters the collapse post filters, or empty if no collapse dedup is needed
73+ * @param searcher the searcher to read field values from, required when collapseFilters is
74+ * non-empty
5575 * @return the combined query result
5676 */
57- public static QueryResult simpleCombine (List <QueryResult > queryResults ) {
77+ public static QueryResult simpleCombine (
78+ List <QueryResult > queryResults , List <Query > collapseFilters , SolrIndexSearcher searcher ) {
5879 QueryResult combinedQueryResults = new QueryResult ();
5980 DocSet combinedDocSet = null ;
6081 Map <Integer , Float > uniqueDocIds = new HashMap <>();
@@ -71,6 +92,19 @@ public static QueryResult simpleCombine(List<QueryResult> queryResults) {
7192 combinedDocSet = combinedDocSet .union (queryResult .getDocSet ());
7293 }
7394 }
95+
96+ // If collapse fields are specified, deduplicate by field value across combined queries.
97+ // Each sub-query already collapsed individually, but different sub-queries may have
98+ // selected different group heads for the same field value.
99+ int removedByCollapse = 0 ;
100+ if (CollectionUtil .isNotEmpty (collapseFilters ) && searcher != null && queryResults .size () > 1 ) {
101+ int preCollapseSize = uniqueDocIds .size ();
102+ combinedDocSet =
103+ removeCollapsedDuplicatesViaSearcher (
104+ collapseFilters , searcher , uniqueDocIds , combinedDocSet );
105+ removedByCollapse = preCollapseSize - uniqueDocIds .size ();
106+ }
107+
74108 int combinedResultsLength = uniqueDocIds .size ();
75109 int [] combinedResultsDocIds = new int [combinedResultsLength ];
76110 float [] combinedResultScores = new float [combinedResultsLength ];
@@ -87,14 +121,64 @@ public static QueryResult simpleCombine(List<QueryResult> queryResults) {
87121 combinedResultsLength ,
88122 combinedResultsDocIds ,
89123 combinedResultScores ,
90- Math .max (combinedResultsLength , totalMatches ),
124+ Math .max (combinedResultsLength , totalMatches - removedByCollapse ),
91125 combinedResultScores .length > 0 ? combinedResultScores [0 ] : 0 ,
92126 TotalHits .Relation .GREATER_THAN_OR_EQUAL_TO );
93127 combinedQueryResults .setDocList (combinedResultSlice );
94128 combinedQueryResults .setDocSet (combinedDocSet );
95129 return combinedQueryResults ;
96130 }
97131
132+ /**
133+ * Removes collapsed duplicates across combined sub-queries. Ensures that only one document per
134+ * collapse field value retained across the merged results. Entries removed by collapsing are also
135+ * removed from {@code uniqueDocIds} (mutated in place).
136+ *
137+ * @return the collapsed combined DocSet, or null if combinedDocSet was null
138+ */
139+ private static DocSet removeCollapsedDuplicatesViaSearcher (
140+ List <Query > collapseFilters ,
141+ SolrIndexSearcher searcher ,
142+ Map <Integer , Float > uniqueDocIds ,
143+ DocSet combinedDocSet ) {
144+ IntDoubleHashMap scoreMap = new IntDoubleHashMap (uniqueDocIds .size ());
145+ uniqueDocIds .forEach ((doc , score ) -> scoreMap .put (doc , score .doubleValue ()));
146+ Query baseQuery ;
147+ boolean needDocSet ;
148+ if (combinedDocSet != null ) {
149+ baseQuery = combinedDocSet .makeQuery ();
150+ needDocSet = true ;
151+ } else {
152+ int [] queryDocIds =
153+ uniqueDocIds .keySet ().stream ().mapToInt (Integer ::intValue ).sorted ().toArray ();
154+ baseQuery = new SortedIntDocSet (queryDocIds ).makeQuery ();
155+ needDocSet = false ;
156+ }
157+ Query scoredQuery =
158+ FunctionScoreQuery .boostByValue (baseQuery , new PrecomputedScoreValuesSource (scoreMap ));
159+
160+ try {
161+ QueryCommand cmd =
162+ new QueryCommand ()
163+ .setQuery (scoredQuery )
164+ .setFilterList (collapseFilters )
165+ .setLen (uniqueDocIds .size ())
166+ .setNeedDocSet (needDocSet );
167+ QueryResult result = searcher .search (cmd );
168+
169+ Set <Integer > retainedDocIds = HashSet .newHashSet (result .getDocList ().size ());
170+ DocIterator iter = result .getDocList ().iterator ();
171+ while (iter .hasNext ()) {
172+ retainedDocIds .add (iter .nextDoc ());
173+ }
174+
175+ uniqueDocIds .keySet ().retainAll (retainedDocIds );
176+ return needDocSet ? result .getDocSet () : null ;
177+ } catch (IOException e ) {
178+ throw new SolrException (SolrException .ErrorCode .SERVER_ERROR , e );
179+ }
180+ }
181+
98182 /**
99183 * Retrieves a list of explanations for the given queries and results.
100184 *
@@ -127,4 +211,67 @@ public static QueryAndResponseCombiner getImplementation(
127211 throw new SolrException (
128212 SolrException .ErrorCode .BAD_REQUEST , "Unknown Combining algorithm: " + algorithm );
129213 }
214+
215+ /**
216+ * A {@link DoubleValuesSource} backed by a global doc ID to score map. Returns pre-computed
217+ * scores for specific document IDs.
218+ */
219+ private static class PrecomputedScoreValuesSource extends DoubleValuesSource {
220+
221+ private final IntDoubleHashMap scoreByDoc ;
222+
223+ PrecomputedScoreValuesSource (IntDoubleHashMap scoreByDoc ) {
224+ this .scoreByDoc = scoreByDoc ;
225+ }
226+
227+ @ Override
228+ public DoubleValues getValues (LeafReaderContext ctx , DoubleValues existing ) {
229+ int base = ctx .docBase ;
230+ return new DoubleValues () {
231+ private double currentScore ;
232+
233+ @ Override
234+ public double doubleValue () {
235+ return currentScore ;
236+ }
237+
238+ @ Override
239+ public boolean advanceExact (int doc ) {
240+ int globalDoc = base + doc ;
241+ currentScore = scoreByDoc .get (globalDoc );
242+ return true ;
243+ }
244+ };
245+ }
246+
247+ @ Override
248+ public boolean needsScores () {
249+ return false ;
250+ }
251+
252+ @ Override
253+ public DoubleValuesSource rewrite (IndexSearcher searcher ) {
254+ return this ;
255+ }
256+
257+ @ Override
258+ public boolean isCacheable (LeafReaderContext ctx ) {
259+ return false ;
260+ }
261+
262+ @ Override
263+ public boolean equals (Object o ) {
264+ return o instanceof PrecomputedScoreValuesSource other && scoreByDoc .equals (other .scoreByDoc );
265+ }
266+
267+ @ Override
268+ public int hashCode () {
269+ return scoreByDoc .hashCode ();
270+ }
271+
272+ @ Override
273+ public String toString () {
274+ return "PrecomputedScoreValuesSource(docs=" + scoreByDoc .size () + ")" ;
275+ }
276+ }
130277}
0 commit comments