3737import org .apache .hadoop .hive .ql .metadata .Hive ;
3838import org .apache .hadoop .hive .ql .metadata .HiveException ;
3939import org .apache .hadoop .hive .ql .metadata .HiveUtils ;
40+ import org .apache .hadoop .hive .ql .metadata .RowLineageUtils ;
4041import org .apache .hadoop .hive .ql .metadata .VirtualColumn ;
4142import org .apache .hadoop .hive .ql .parse .TransformSpec ;
4243import org .apache .hadoop .hive .ql .session .SessionState ;
@@ -69,20 +70,23 @@ public boolean run(CompactorContext context) throws IOException, HiveException,
6970
7071 HiveConf conf = new HiveConf (context .getConf ());
7172 CompactionInfo ci = context .getCompactionInfo ();
73+
7274 String compactionQuery = buildCompactionQuery (context , compactTableName , conf );
7375
7476 SessionState sessionState = setupQueryCompactionSession (conf , ci , tblProperties );
77+
7578 String compactionTarget = "table " + HiveUtils .unparseIdentifier (compactTableName ) +
7679 (ci .partName != null ? ", partition " + HiveUtils .unparseIdentifier (ci .partName ) : "" );
7780
7881 try {
79- DriverUtils .runOnDriver (conf , sessionState , compactionQuery );
82+ DriverUtils .runOnDriver (sessionState . getConf () , sessionState , compactionQuery );
8083 LOG .info ("Completed compaction for {}" , compactionTarget );
8184 return true ;
8285 } catch (HiveException e ) {
8386 LOG .error ("Failed compacting {}" , compactionTarget , e );
8487 throw e ;
8588 } finally {
89+ RowLineageUtils .disableRowLineage (sessionState );
8690 sessionState .setCompaction (false );
8791 }
8892 }
@@ -94,59 +98,113 @@ private String buildCompactionQuery(CompactorContext context, String compactTabl
9498 context .getTable ().getTableName ());
9599 Table icebergTable = IcebergTableUtil .getTable (conf , table .getTTable ());
96100 String orderBy = ci .orderByClause == null ? "" : ci .orderByClause ;
97- String fileSizePredicate = null ;
98- String compactionQuery ;
99-
100- if (ci .type == CompactionType .MINOR ) {
101- long fileSizeInBytesThreshold = CompactionEvaluator .getFragmentSizeBytes (table .getParameters ());
102- fileSizePredicate = String .format ("%1$s in (select file_path from %2$s.files where file_size_in_bytes < %3$d)" ,
103- VirtualColumn .FILE_PATH .getName (), compactTableName , fileSizeInBytesThreshold );
104- conf .setLong (CompactorContext .COMPACTION_FILE_SIZE_THRESHOLD , fileSizeInBytesThreshold );
105- // IOW query containing a join with Iceberg .files metadata table fails with exception that Iceberg AVRO format
106- // doesn't support vectorization, hence disabling it in this case.
107- conf .setBoolVar (ConfVars .HIVE_VECTORIZATION_ENABLED , false );
108- }
109-
110- if (ci .partName == null ) {
111- if (!icebergTable .spec ().isPartitioned ()) {
112- HiveConf .setVar (conf , ConfVars .REWRITE_POLICY , RewritePolicy .FULL_TABLE .name ());
113- compactionQuery = String .format ("insert overwrite table %s select * from %<s %2$s %3$s" , compactTableName ,
114- fileSizePredicate == null ? "" : "where " + fileSizePredicate , orderBy );
115- } else if (icebergTable .specs ().size () > 1 ) {
116- // Compacting partitions of old partition specs on a partitioned table with partition evolution
117- HiveConf .setVar (conf , ConfVars .REWRITE_POLICY , RewritePolicy .PARTITION .name ());
118- // A single filter on a virtual column causes errors during compilation,
119- // added another filter on file_path as a workaround.
120- compactionQuery = String .format ("insert overwrite table %1$s select * from %1$s " +
121- "where %2$s != %3$d and %4$s is not null %5$s %6$s" ,
122- compactTableName , VirtualColumn .PARTITION_SPEC_ID .getName (), icebergTable .spec ().specId (),
123- VirtualColumn .FILE_PATH .getName (), fileSizePredicate == null ? "" : "and " + fileSizePredicate , orderBy );
101+ String fileSizePredicate = buildMinorFileSizePredicate (ci , compactTableName , conf , table );
102+
103+ String columnsList = "*" ;
104+ if (RowLineageUtils .supportsRowLineage (table )) {
105+ RowLineageUtils .enableRowLineage (conf );
106+ LOG .debug ("Row lineage flag set for compaction of table {}" , compactTableName );
107+ if (ci .isMajorCompaction () && ci .partName == null ) {
108+ columnsList = buildSelectColumnList (icebergTable , conf ) + RowLineageUtils .getRowLineageColumnsForCompaction ();
124109 } else {
125- // Partitioned table without partition evolution with partition spec as null in the compaction request - this
126- // code branch is not supposed to be reachable
127- throw new HiveException (ErrorMsg .COMPACTION_NO_PARTITION );
110+ columnsList = columnsList + RowLineageUtils .getRowLineageColumnsForCompaction ();
128111 }
129- } else {
130- HiveConf .setBoolVar (conf , ConfVars .HIVE_CONVERT_JOIN , false );
131- conf .setBoolVar (ConfVars .HIVE_VECTORIZATION_ENABLED , false );
112+ }
113+
114+ String compactionQuery = (ci .partName == null ) ?
115+ buildFullTableCompactionQuery (compactTableName , conf , icebergTable ,
116+ columnsList , fileSizePredicate , orderBy ) :
117+ buildPartitionCompactionQuery (ci , compactTableName , conf , icebergTable ,
118+ columnsList , fileSizePredicate , orderBy );
119+
120+ LOG .info ("Compaction query: {}" , compactionQuery );
121+ return compactionQuery ;
122+ }
123+
124+ private static String buildMinorFileSizePredicate (
125+ CompactionInfo ci , String compactTableName , HiveConf conf , org .apache .hadoop .hive .ql .metadata .Table table ) {
126+ if (ci .type != CompactionType .MINOR ) {
127+ return null ;
128+ }
129+
130+ long fileSizeInBytesThreshold = CompactionEvaluator .getFragmentSizeBytes (table .getParameters ());
131+ conf .setLong (CompactorContext .COMPACTION_FILE_SIZE_THRESHOLD , fileSizeInBytesThreshold );
132+ // IOW query containing a join with Iceberg .files metadata table fails with exception that Iceberg AVRO format
133+ // doesn't support vectorization, hence disabling it in this case.
134+ conf .setBoolVar (ConfVars .HIVE_VECTORIZATION_ENABLED , false );
135+
136+ return String .format ("%1$s in (select file_path from %2$s.files where file_size_in_bytes < %3$d)" ,
137+ VirtualColumn .FILE_PATH .getName (), compactTableName , fileSizeInBytesThreshold );
138+ }
139+
140+ private String buildFullTableCompactionQuery (
141+ String compactTableName ,
142+ HiveConf conf ,
143+ Table icebergTable ,
144+ String columnsList ,
145+ String fileSizePredicate ,
146+ String orderBy ) throws HiveException {
147+
148+ if (!icebergTable .spec ().isPartitioned ()) {
149+ HiveConf .setVar (conf , ConfVars .REWRITE_POLICY , RewritePolicy .FULL_TABLE .name ());
150+ return String .format ("insert overwrite table %1$s select %2$s from %1$s %3$s %4$s" ,
151+ compactTableName , columnsList ,
152+ fileSizePredicate == null ? "" : "where " + fileSizePredicate , orderBy );
153+ }
154+
155+ if (icebergTable .specs ().size () > 1 ) {
156+ // Compacting partitions of old partition specs on a partitioned table with partition evolution
132157 HiveConf .setVar (conf , ConfVars .REWRITE_POLICY , RewritePolicy .PARTITION .name ());
133- conf .set (IcebergCompactionService .PARTITION_PATH , new Path (ci .partName ).toString ());
134-
135- PartitionSpec spec ;
136- String partitionPredicate ;
137- try {
138- spec = IcebergTableUtil .getPartitionSpec (icebergTable , ci .partName );
139- partitionPredicate = buildPartitionPredicate (ci , spec );
140- } catch (MetaException e ) {
141- throw new HiveException (e );
142- }
158+ // A single filter on a virtual column causes errors during compilation,
159+ // added another filter on file_path as a workaround.
160+ return String .format ("insert overwrite table %1$s select %2$s from %1$s " +
161+ "where %3$s != %4$d and %5$s is not null %6$s %7$s" ,
162+ compactTableName , columnsList ,
163+ VirtualColumn .PARTITION_SPEC_ID .getName (), icebergTable .spec ().specId (),
164+ VirtualColumn .FILE_PATH .getName (), fileSizePredicate == null ? "" : "and " + fileSizePredicate , orderBy );
165+ }
143166
144- compactionQuery = String .format ("INSERT OVERWRITE TABLE %1$s SELECT * FROM %1$s WHERE %2$s IN " +
145- "(SELECT FILE_PATH FROM %1$s.FILES WHERE %3$s AND SPEC_ID = %4$d) %5$s %6$s" ,
146- compactTableName , VirtualColumn .FILE_PATH .getName (), partitionPredicate , spec .specId (),
147- fileSizePredicate == null ? "" : "AND " + fileSizePredicate , orderBy );
167+ // Partitioned table without partition evolution with partition spec as null in the compaction request - this
168+ // code branch is not supposed to be reachable
169+ throw new HiveException (ErrorMsg .COMPACTION_NO_PARTITION );
170+ }
171+
172+ private String buildPartitionCompactionQuery (
173+ CompactionInfo ci ,
174+ String compactTableName ,
175+ HiveConf conf ,
176+ Table icebergTable ,
177+ String columnsList ,
178+ String fileSizePredicate ,
179+ String orderBy ) throws HiveException {
180+ HiveConf .setBoolVar (conf , ConfVars .HIVE_CONVERT_JOIN , false );
181+ conf .setBoolVar (ConfVars .HIVE_VECTORIZATION_ENABLED , false );
182+ HiveConf .setVar (conf , ConfVars .REWRITE_POLICY , RewritePolicy .PARTITION .name ());
183+ conf .set (IcebergCompactionService .PARTITION_PATH , new Path (ci .partName ).toString ());
184+
185+ PartitionSpec spec ;
186+ String partitionPredicate ;
187+ try {
188+ spec = IcebergTableUtil .getPartitionSpec (icebergTable , ci .partName );
189+ partitionPredicate = buildPartitionPredicate (ci , spec );
190+ } catch (MetaException e ) {
191+ throw new HiveException (e );
148192 }
149- return compactionQuery ;
193+
194+ return String .format ("INSERT OVERWRITE TABLE %1$s SELECT %2$s FROM %1$s WHERE %3$s IN " +
195+ "(SELECT FILE_PATH FROM %1$s.FILES WHERE %4$s AND SPEC_ID = %5$d) %6$s %7$s" ,
196+ compactTableName , columnsList , VirtualColumn .FILE_PATH .getName (), partitionPredicate , spec .specId (),
197+ fileSizePredicate == null ? "" : "AND " + fileSizePredicate , orderBy );
198+ }
199+
200+ /**
201+ * Builds a comma-separated SELECT list from the Iceberg table schema.
202+ */
203+ private static String buildSelectColumnList (Table icebergTable , HiveConf conf ) {
204+ return icebergTable .schema ().columns ().stream ()
205+ .map (Types .NestedField ::name )
206+ .map (col -> HiveUtils .unparseIdentifier (col , conf ))
207+ .collect (Collectors .joining (", " ));
150208 }
151209
152210 private String buildPartitionPredicate (CompactionInfo ci , PartitionSpec spec ) throws MetaException {
0 commit comments