Skip to content

Commit 0507967

Browse files
committed
Create de-dup files based on key matches
1 parent 7697596 commit 0507967

1 file changed

Lines changed: 72 additions & 2 deletions

File tree

core/src/main/gov/nara/nwts/ftapp/importer/CountKey.java

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import java.io.File;
1717
import java.io.IOException;
18+
import java.util.StringTokenizer;
1819
import java.util.TreeMap;
1920
import java.util.Vector;
2021

@@ -41,6 +42,7 @@ public static enum Generator implements StatsGenerator {
4142
public static final String DELIM = "Delimiter";
4243
public static final String HEADROW = "HeadRow";
4344
public static final String COL = "COL";
45+
public static final String DEDUP = "DeDup";
4446
public CountKey(FTDriver dt) {
4547
super(dt);
4648
this.ftprops.add(new FTPropEnum(dt, this.getClass().getName(), DELIM, "delim",
@@ -49,6 +51,8 @@ public CountKey(FTDriver dt) {
4951
"Treat first row as header", YN.values(), YN.Y));
5052
this.ftprops.add(new FTPropString(dt, this.getClass().getName(), COL, COL,
5153
"Key Column starting at 1", "1"));
54+
this.ftprops.add(new FTPropEnum(dt, this.getClass().getName(), DEDUP, DEDUP,
55+
"Create De-duplicated file set", YN.values(), YN.N));
5256
}
5357

5458
public ActionResult importFile(File selectedFile) throws IOException {
@@ -67,27 +71,75 @@ public ActionResult importFile(File selectedFile) throws IOException {
6771

6872
DelimitedFileReader dfr = new DelimitedFileReader(selectedFile, fileSeparator.separator);
6973
boolean firstRow = (YN)getProperty(HEADROW) == YN.Y;
74+
boolean dedup = (YN)getProperty(DEDUP) == YN.Y;
75+
DelimitedFileWriter bwDedup = null;
76+
DelimitedFileWriter bwDup = null;
77+
if (dedup) {
78+
bwDedup = new DelimitedFileWriter(getNewFile(selectedFile, "dedup"), fileSeparator.separator);
79+
bwDup = new DelimitedFileWriter(getNewFile(selectedFile, "dup-drop"), fileSeparator.separator);
80+
}
7081

7182
firstRow = (YN)getProperty(HEADROW) == YN.Y;
7283
dfr = new DelimitedFileReader(selectedFile, fileSeparator.separator);
7384
for(Vector<String> cols = dfr.getRow(); cols != null; cols = dfr.getRow()){
7485
if (firstRow) {
7586
firstRow = false;
87+
if (dedup) {
88+
bwDedup.writeRow(cols);
89+
bwDup.writeRow(cols);
90+
}
7691
continue;
7792
}
7893
String key = cols.get(col < cols.size() ? col : 0);
7994
Stats stats = types.get(key);
80-
if (stats == null) {
95+
if (stats == null) {
8196
stats = Generator.INSTANCE.create(key);
8297
stats.setVal(CountStatsItems.Count, 1);
8398
stats.setVal(CountStatsItems.Stat, MULT.ONE);
8499
types.put(key, stats);
100+
if (dedup) {
101+
bwDedup.writeRow(cols);
102+
}
85103
} else {
86104
stats.sumVal(CountStatsItems.Count, 1);
87105
stats.setVal(CountStatsItems.Stat, MULT.MANY);
106+
if (dedup) {
107+
bwDup.writeRow(cols);
108+
}
88109
}
89110
}
90-
111+
112+
if (dedup) {
113+
bwDedup.close();
114+
bwDup.close();
115+
116+
DelimitedFileWriter bwNoDup = new DelimitedFileWriter(getNewFile(selectedFile, "no-dup"), fileSeparator.separator);
117+
DelimitedFileWriter bwAllDup = new DelimitedFileWriter(getNewFile(selectedFile, "all-dup"), fileSeparator.separator);
118+
dfr = new DelimitedFileReader(selectedFile, fileSeparator.separator);
119+
firstRow = (YN)getProperty(HEADROW) == YN.Y;
120+
for(Vector<String> cols = dfr.getRow(); cols != null; cols = dfr.getRow()){
121+
if (firstRow) {
122+
firstRow = false;
123+
if (dedup) {
124+
bwNoDup.writeRow(cols);
125+
bwAllDup.writeRow(cols);
126+
}
127+
continue;
128+
}
129+
String key = cols.get(col < cols.size() ? col : 0);
130+
Stats stats = types.get(key);
131+
if (stats == null) {
132+
} else if (stats.getVal(CountStatsItems.Stat) == MULT.ONE) {
133+
bwNoDup.writeRow(cols);
134+
} else {
135+
bwAllDup.writeRow(cols);
136+
}
137+
}
138+
bwNoDup.close();
139+
bwAllDup.close();
140+
}
141+
142+
91143
return new ActionResult(selectedFile, selectedFile.getName(), this.toString(), details, types, true, timer.getDuration());
92144
}
93145

@@ -100,4 +152,22 @@ public String getDescription() {
100152
public String getShortName() {
101153
return "Key";
102154
}
155+
public String getExt(File f) {
156+
String ext = "";
157+
StringTokenizer st = new StringTokenizer(f.getName(), ".");
158+
while(st.hasMoreElements()) {
159+
ext = st.nextElement().toString();
160+
}
161+
return ext;
162+
}
163+
164+
public String getNewName(File f, String suff) {
165+
String ext = getExt(f);
166+
String bname = f.getName();
167+
return (ext.isEmpty()) ? bname + "." + suff : bname.replaceFirst(ext + "$", suff + "." + ext);
168+
}
169+
public File getNewFile(File f, String suff) {
170+
return new File(f.getParentFile(), getNewName(f, suff));
171+
}
172+
103173
}

0 commit comments

Comments
 (0)