Skip to content

Commit a000b74

Browse files
committed
Merge pull request #50 from Georgetown-University-Libraries/multiparse
multiparse labels
2 parents b8b6613 + 96c1f23 commit a000b74

1 file changed

Lines changed: 26 additions & 12 deletions

File tree

core/src/main/gov/nara/nwts/ftapp/importer/MultiParser.java

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import gov.nara.nwts.ftapp.ActionResult;
44
import gov.nara.nwts.ftapp.FTDriver;
55
import gov.nara.nwts.ftapp.Timer;
6+
import gov.nara.nwts.ftapp.YN;
7+
import gov.nara.nwts.ftapp.ftprop.FTPropEnum;
68
import gov.nara.nwts.ftapp.ftprop.FTPropFile;
79
import gov.nara.nwts.ftapp.ftprop.InitializationStatus;
810
import gov.nara.nwts.ftapp.stats.Stats;
@@ -31,6 +33,7 @@ public static enum status {PASS,WARN,ERROR,SKIP,FAIL}
3133
public static enum ParserStatsItems implements StatsItemEnum {
3234
Row(StatsItem.makeStringStatsItem("Row",60)),
3335
PassFail(StatsItem.makeEnumStatsItem(status.class, "Pass/Fail").setInitVal(status.PASS)),
36+
Label(StatsItem.makeStringStatsItem("Label").makeFilter(true)),
3437
Data(StatsItem.makeStringStatsItem("Data",300));
3538

3639
StatsItem si;
@@ -44,23 +47,32 @@ public StatsItemConfig getDetails() {
4447
}
4548

4649
public static final String F_PARSE = "Parser Rule File";
50+
public static final String F_CASE = "Case Insensitive";
4751
private ParserFile pParseRule;
4852

4953
public MultiParser(FTDriver dt) {
5054
super(dt);
5155
pParseRule = new ParserFile(dt);
5256
this.ftprops.add(pParseRule);
57+
this.ftprops.add(new FTPropEnum(dt, this.getClass().getName(), F_CASE, F_CASE,
58+
"Treat patterns as case insensitive", YN.values(), YN.N));
5359
}
5460

5561
enum ParserFileMode {NA,COLS,FILTERS,PATTS;}
62+
int getPatternFlags() {
63+
return getProperty(F_CASE) == YN.Y ? Pattern.CASE_INSENSITIVE : 0;
64+
65+
}
5666

5767
class ParserPattern {
5868
status stat = status.FAIL;
5969
Pattern p;
70+
String label;
6071

61-
ParserPattern(String category, String pattern) throws PatternSyntaxException {
72+
ParserPattern(String category, String pattern, String label) throws PatternSyntaxException {
6273
this.stat = status.valueOf(category);
63-
this.p = Pattern.compile(pattern);
74+
this.p = Pattern.compile(pattern, MultiParser.this.getPatternFlags());
75+
this.label = label;
6476
}
6577
}
6678

@@ -79,13 +91,16 @@ class ParserFile extends FTPropFile {
7991
if (sb.length() > s.length()) sb.append("|");
8092
sb.append(stat.toString());
8193
}
82-
sb.append(")\\]$");
94+
sb.append("):?(.*)\\]$");
8395
pCat = Pattern.compile(sb.toString());
8496
}
8597

8698
public InitializationStatus initValidation(File refFile){
99+
patterns.clear();
87100
InitializationStatus iStat = new InitializationStatus();
88101
String category = "";
102+
String label = "";
103+
89104
try (BufferedReader br = new BufferedReader(new FileReader(this.getFile()))) {
90105
ParserFileMode pfm = ParserFileMode.NA;
91106
for(String line=br.readLine(); line!=null; line=br.readLine()) {
@@ -109,6 +124,7 @@ public InitializationStatus initValidation(File refFile){
109124
Matcher m = pCat.matcher(line);
110125
if (m.matches()) {
111126
category = m.group(1);
127+
label = m.group(2) == null ? "" : m.group(2).trim();
112128
continue;
113129
}
114130

@@ -117,7 +133,7 @@ public InitializationStatus initValidation(File refFile){
117133
} else if (pfm == ParserFileMode.FILTERS) {
118134
fgroups = line.split(",");
119135
} else if (pfm == ParserFileMode.PATTS) {
120-
patterns.add(new ParserPattern(category, line));
136+
patterns.add(new ParserPattern(category, line, label));
121137
}
122138
}
123139
} catch (PatternSyntaxException|IOException e) {
@@ -154,6 +170,7 @@ public ActionResult importFile(File selectedFile) throws IOException {
154170
Matcher m = patt.p.matcher(line);
155171
if (m.matches()) {
156172
stats.setVal(ParserStatsItems.PassFail, patt.stat);
173+
stats.setVal(ParserStatsItems.Label, patt.label);
157174
for(String s: pParseRule.groups) {
158175
try {
159176
StatsItem si = details.getByKey(s);
@@ -182,21 +199,18 @@ public String toString() {
182199
}
183200
public String getDescription() {
184201
return "This rule will parse each line of a file and add it to the results table.\n" +
185-
"This rule takes advantage of named groups in a regular expression match.\n\n" +
202+
"This rule takes advantage of named groups in a regular expression match.\n" +
186203
"[COLS]\n" +
187-
"FIRST,LAST,ID,COST\n\n" +
204+
"FIRST,LAST,ID,COST\n" +
188205
"[FILTERS]\n" +
189-
"LAST,COST\n\n" +
206+
"LAST,COST\n" +
190207
"[PATTERNS]\n" +
191208
"# Sample Comment 1\n" +
192-
"[CATEGORY: SKIP]\n" +
209+
"[CATEGORY: SKIP: label]\n" +
193210
"^(?<FIRST>[^\\t\\-]+)-(?<ID>[^\\t]+)\\t(?<LAST>[^\\t]+).*\\$(?<COST>\\d+).*$\n" +
194211
"^(?<FIRST>[^\\t\\-]+)-(?<ID>[^\\t]+)\\t(?<LAST>[^\\t]+).*$\n" +
195212
"# Sample Comment 2\n" +
196-
"[CATEGORY: WARN]\n" +
197-
"^(?<FIRST>[^\\t\\-]+)\\t(?<LAST>[^\\t]+).*\\$(?<COST>\\d+).*$\n" +
198-
"^(?<FIRST>[^\\t\\-]+)\\t(?<LAST>[^\\t]+).*$\n" +
199-
"^$";
213+
"[CATEGORY: WARN: label]";
200214
}
201215
public String getShortName() {
202216
return "MultiParse";

0 commit comments

Comments
 (0)