Skip to content

Commit b8b6613

Browse files
committed
Merge pull request #49 from Georgetown-University-Libraries/multiparse
Multiparse
2 parents fdbeb2b + c5527bc commit b8b6613

1 file changed

Lines changed: 69 additions & 28 deletions

File tree

core/src/main/gov/nara/nwts/ftapp/importer/MultiParser.java

Lines changed: 69 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import gov.nara.nwts.ftapp.FTDriver;
55
import gov.nara.nwts.ftapp.Timer;
66
import gov.nara.nwts.ftapp.ftprop.FTPropFile;
7+
import gov.nara.nwts.ftapp.ftprop.InitializationStatus;
78
import gov.nara.nwts.ftapp.stats.Stats;
89
import gov.nara.nwts.ftapp.stats.StatsItem;
910
import gov.nara.nwts.ftapp.stats.StatsItemConfig;
@@ -18,14 +19,15 @@
1819
import java.util.Vector;
1920
import java.util.regex.Matcher;
2021
import java.util.regex.Pattern;
22+
import java.util.regex.PatternSyntaxException;
2123

2224
/**
2325
* Base class parser to ananlyze and ingest individual rows from a file using a regular expression pattern.
2426
* @author TBrady
2527
*
2628
*/
2729
public class MultiParser extends DefaultImporter {
28-
public static enum status {PASS,FAIL}
30+
public static enum status {PASS,WARN,ERROR,SKIP,FAIL}
2931
public static enum ParserStatsItems implements StatsItemEnum {
3032
Row(StatsItem.makeStringStatsItem("Row",60)),
3133
PassFail(StatsItem.makeEnumStatsItem(status.class, "Pass/Fail").setInitVal(status.PASS)),
@@ -42,23 +44,49 @@ public StatsItemConfig getDetails() {
4244
}
4345

4446
public static final String F_PARSE = "Parser Rule File";
45-
private FTPropFile pParseRule;
47+
private ParserFile pParseRule;
4648

4749
public MultiParser(FTDriver dt) {
4850
super(dt);
49-
pParseRule = new FTPropFile(dt, this.getClass().getName(), F_PARSE, F_PARSE,
50-
"Parser Rule File", "");
51+
pParseRule = new ParserFile(dt);
5152
this.ftprops.add(pParseRule);
5253
}
5354

54-
enum ParserFileMode {NA,COLS,PATTS;}
55+
enum ParserFileMode {NA,COLS,FILTERS,PATTS;}
56+
57+
class ParserPattern {
58+
status stat = status.FAIL;
59+
Pattern p;
60+
61+
ParserPattern(String category, String pattern) throws PatternSyntaxException {
62+
this.stat = status.valueOf(category);
63+
this.p = Pattern.compile(pattern);
64+
}
65+
}
5566

56-
class ParserFile {
67+
class ParserFile extends FTPropFile {
5768
String[] groups = new String[0];
58-
Vector<Pattern> patterns = new Vector<Pattern>();
69+
String[] fgroups = new String[0];
70+
Vector<ParserPattern> patterns = new Vector<ParserPattern>();
71+
Pattern pCat;
5972

60-
ParserFile(File f) {
61-
try (BufferedReader br = new BufferedReader(new FileReader(f))) {
73+
ParserFile(FTDriver dt) {
74+
super(dt, MultiParser.this.getClass().getName(), F_PARSE, F_PARSE,
75+
"Parser Rule File", "");
76+
String s = "^\\[CATEGORY:\\s*(";
77+
StringBuilder sb = new StringBuilder(s);
78+
for(status stat: status.values()) {
79+
if (sb.length() > s.length()) sb.append("|");
80+
sb.append(stat.toString());
81+
}
82+
sb.append(")\\]$");
83+
pCat = Pattern.compile(sb.toString());
84+
}
85+
86+
public InitializationStatus initValidation(File refFile){
87+
InitializationStatus iStat = new InitializationStatus();
88+
String category = "";
89+
try (BufferedReader br = new BufferedReader(new FileReader(this.getFile()))) {
6290
ParserFileMode pfm = ParserFileMode.NA;
6391
for(String line=br.readLine(); line!=null; line=br.readLine()) {
6492
line = line.trim();
@@ -69,41 +97,49 @@ class ParserFile {
6997
pfm = ParserFileMode.COLS;
7098
continue;
7199
}
100+
if (line.equals("[FILTERS]")) {
101+
pfm = ParserFileMode.FILTERS;
102+
continue;
103+
}
72104
if (line.equals("[PATTERNS]")) {
73105
pfm = ParserFileMode.PATTS;
74106
continue;
75107
}
108+
109+
Matcher m = pCat.matcher(line);
110+
if (m.matches()) {
111+
category = m.group(1);
112+
continue;
113+
}
76114

77115
if (pfm == ParserFileMode.COLS) {
78116
groups = line.split(",");
117+
} else if (pfm == ParserFileMode.FILTERS) {
118+
fgroups = line.split(",");
79119
} else if (pfm == ParserFileMode.PATTS) {
80-
try {
81-
patterns.add(Pattern.compile(line));
82-
} catch (Exception e) {
83-
e.printStackTrace();
84-
}
120+
patterns.add(new ParserPattern(category, line));
85121
}
86122
}
87-
} catch (FileNotFoundException e) {
88-
e.printStackTrace();
89-
} catch (IOException e) {
90-
e.printStackTrace();
123+
} catch (PatternSyntaxException|IOException e) {
124+
iStat.addMessage(e);
91125
}
92-
126+
return iStat;
93127
}
94128
}
95129

96130

97131
public ActionResult importFile(File selectedFile) throws IOException {
98-
File parse = pParseRule.getFile();
99-
ParserFile pf = new ParserFile(parse);
100132
details = StatsItemConfig.create(ParserStatsItems.class);
101-
for(String grp: pf.groups) {
133+
for(String grp: pParseRule.groups) {
102134
details.addStatsItem(grp, StatsItem.makeStringStatsItem(grp));
103135
}
104-
for(Pattern patt: pf.patterns) {
105-
System.out.println(patt.toString());
136+
for(String grp: pParseRule.fgroups) {
137+
StatsItem si = details.getByKey(grp);
138+
if (si != null) {
139+
si.setWidth(150).makeFilter(true);
140+
}
106141
}
142+
107143
Timer timer = new Timer();
108144
TreeMap<String,Stats> types = new TreeMap<String,Stats>();
109145
try {
@@ -114,11 +150,11 @@ public ActionResult importFile(File selectedFile) throws IOException {
114150
Stats stats = Stats.Generator.INSTANCE.create(details, key);
115151
stats.setVal(ParserStatsItems.PassFail, status.FAIL);
116152
types.put(key, stats);
117-
for(Pattern p: pf.patterns) {
118-
Matcher m = p.matcher(line);
153+
for(ParserPattern patt: pParseRule.patterns) {
154+
Matcher m = patt.p.matcher(line);
119155
if (m.matches()) {
120-
stats.setVal(ParserStatsItems.PassFail, status.PASS);
121-
for(String s: pf.groups) {
156+
stats.setVal(ParserStatsItems.PassFail, patt.stat);
157+
for(String s: pParseRule.groups) {
122158
try {
123159
StatsItem si = details.getByKey(s);
124160
stats.setKeyVal(si, m.group(s));
@@ -131,6 +167,7 @@ public ActionResult importFile(File selectedFile) throws IOException {
131167
stats.setVal(ParserStatsItems.Data, line);
132168
}
133169
br.close();
170+
details.createFilters(types);
134171
return new ActionResult(selectedFile, selectedFile.getName(), this.toString(), details, types, true, timer.getDuration());
135172
} catch (FileNotFoundException e) {
136173
e.printStackTrace();
@@ -148,11 +185,15 @@ public String getDescription() {
148185
"This rule takes advantage of named groups in a regular expression match.\n\n" +
149186
"[COLS]\n" +
150187
"FIRST,LAST,ID,COST\n\n" +
188+
"[FILTERS]\n" +
189+
"LAST,COST\n\n" +
151190
"[PATTERNS]\n" +
152191
"# Sample Comment 1\n" +
192+
"[CATEGORY: SKIP]\n" +
153193
"^(?<FIRST>[^\\t\\-]+)-(?<ID>[^\\t]+)\\t(?<LAST>[^\\t]+).*\\$(?<COST>\\d+).*$\n" +
154194
"^(?<FIRST>[^\\t\\-]+)-(?<ID>[^\\t]+)\\t(?<LAST>[^\\t]+).*$\n" +
155195
"# Sample Comment 2\n" +
196+
"[CATEGORY: WARN]\n" +
156197
"^(?<FIRST>[^\\t\\-]+)\\t(?<LAST>[^\\t]+).*\\$(?<COST>\\d+).*$\n" +
157198
"^(?<FIRST>[^\\t\\-]+)\\t(?<LAST>[^\\t]+).*$\n" +
158199
"^$";

0 commit comments

Comments
 (0)