Skip to content

Commit b335b07

Browse files
HIVE-29551: Refactor ColumnStatsSemanticAnalyzer for performance optimization (#6443)
1 parent 1516fb9 commit b335b07

5 files changed

Lines changed: 156 additions & 131 deletions

File tree

ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2286,6 +2286,10 @@ public static List<String> getColumnNamesFromFieldSchema(List<FieldSchema> partC
22862286
return names;
22872287
}
22882288

2289+
public static List<String> getColumnTypesFromFieldSchema(List<FieldSchema> fieldSchemas) {
2290+
return fieldSchemas.stream().map(FieldSchema::getType).toList();
2291+
}
2292+
22892293
public static List<String> getInternalColumnNamesFromSignature(List<ColumnInfo> colInfos) {
22902294
List<String> names = new ArrayList<String>();
22912295
for (ColumnInfo ci : colInfos) {

ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.hadoop.hive.ql.parse;
2020

2121
import java.io.IOException;
22+
import java.io.Serializable;
2223
import java.io.UnsupportedEncodingException;
2324
import java.util.ArrayList;
2425
import java.util.Collection;
@@ -1413,11 +1414,54 @@ public String toString() {
14131414
}
14141415
}
14151416

1417+
/**
1418+
* Holds table column {@link FieldSchema} entries and lazily derived parallel name/type string
1419+
* lists for analyze / column-stats compilation.
1420+
*/
1421+
public static final class FieldSchemas implements Serializable {
1422+
1423+
private static final long serialVersionUID = 1L;
1424+
1425+
private final List<FieldSchema> schemas;
1426+
1427+
private transient List<String> colNames;
1428+
private transient List<String> colTypes;
1429+
1430+
public FieldSchemas(List<FieldSchema> schemas) {
1431+
this.schemas = schemas != null ? schemas : Collections.emptyList();
1432+
}
1433+
1434+
public List<FieldSchema> getSchemas() {
1435+
return schemas;
1436+
}
1437+
1438+
public int size() {
1439+
return schemas.size();
1440+
}
1441+
1442+
public FieldSchema get(int index) {
1443+
return schemas.get(index);
1444+
}
1445+
1446+
public List<String> getColName() {
1447+
if (colNames == null) {
1448+
colNames = Utilities.getColumnNamesFromFieldSchema(schemas);
1449+
}
1450+
return colNames;
1451+
}
1452+
1453+
public List<String> getColType() {
1454+
if (colTypes == null) {
1455+
colTypes = Utilities.getColumnTypesFromFieldSchema(schemas);
1456+
}
1457+
return colTypes;
1458+
}
1459+
}
1460+
14161461
public static class AnalyzeRewriteContext {
14171462

14181463
private String tableName;
1419-
private List<String> colName;
1420-
private List<String> colType;
1464+
private FieldSchemas fieldSchemas;
14211465
private boolean tblLvl;
14221466

14231467
public String getTableName() {
@@ -1428,12 +1472,12 @@ public void setTableName(String tableName) {
14281472
this.tableName = tableName;
14291473
}
14301474

1431-
public List<String> getColName() {
1432-
return colName;
1475+
public FieldSchemas getFieldSchemas() {
1476+
return fieldSchemas;
14331477
}
14341478

1435-
public void setColName(List<String> colName) {
1436-
this.colName = colName;
1479+
public void setFieldSchemas(FieldSchemas fieldSchemas) {
1480+
this.fieldSchemas = fieldSchemas;
14371481
}
14381482

14391483
public boolean isTblLvl() {
@@ -1444,14 +1488,6 @@ public void setTblLvl(boolean isTblLvl) {
14441488
this.tblLvl = isTblLvl;
14451489
}
14461490

1447-
public List<String> getColType() {
1448-
return colType;
1449-
}
1450-
1451-
public void setColType(List<String> colType) {
1452-
this.colType = colType;
1453-
}
1454-
14551491
}
14561492

14571493
/**

0 commit comments

Comments
 (0)