1515
1616import java .io .File ;
1717import java .io .IOException ;
18+ import java .util .StringTokenizer ;
1819import java .util .TreeMap ;
1920import java .util .Vector ;
2021
@@ -41,6 +42,7 @@ public static enum Generator implements StatsGenerator {
4142 public static final String DELIM = "Delimiter" ;
4243 public static final String HEADROW = "HeadRow" ;
4344 public static final String COL = "COL" ;
45+ public static final String DEDUP = "DeDup" ;
4446 public CountKey (FTDriver dt ) {
4547 super (dt );
4648 this .ftprops .add (new FTPropEnum (dt , this .getClass ().getName (), DELIM , "delim" ,
@@ -49,6 +51,8 @@ public CountKey(FTDriver dt) {
4951 "Treat first row as header" , YN .values (), YN .Y ));
5052 this .ftprops .add (new FTPropString (dt , this .getClass ().getName (), COL , COL ,
5153 "Key Column starting at 1" , "1" ));
54+ this .ftprops .add (new FTPropEnum (dt , this .getClass ().getName (), DEDUP , DEDUP ,
55+ "Create De-duplicated file set" , YN .values (), YN .N ));
5256 }
5357
5458 public ActionResult importFile (File selectedFile ) throws IOException {
@@ -67,27 +71,75 @@ public ActionResult importFile(File selectedFile) throws IOException {
6771
6872 DelimitedFileReader dfr = new DelimitedFileReader (selectedFile , fileSeparator .separator );
6973 boolean firstRow = (YN )getProperty (HEADROW ) == YN .Y ;
74+ boolean dedup = (YN )getProperty (DEDUP ) == YN .Y ;
75+ DelimitedFileWriter bwDedup = null ;
76+ DelimitedFileWriter bwDup = null ;
77+ if (dedup ) {
78+ bwDedup = new DelimitedFileWriter (getNewFile (selectedFile , "dedup" ), fileSeparator .separator );
79+ bwDup = new DelimitedFileWriter (getNewFile (selectedFile , "dup-drop" ), fileSeparator .separator );
80+ }
7081
7182 firstRow = (YN )getProperty (HEADROW ) == YN .Y ;
7283 dfr = new DelimitedFileReader (selectedFile , fileSeparator .separator );
7384 for (Vector <String > cols = dfr .getRow (); cols != null ; cols = dfr .getRow ()){
7485 if (firstRow ) {
7586 firstRow = false ;
87+ if (dedup ) {
88+ bwDedup .writeRow (cols );
89+ bwDup .writeRow (cols );
90+ }
7691 continue ;
7792 }
7893 String key = cols .get (col < cols .size () ? col : 0 );
7994 Stats stats = types .get (key );
80- if (stats == null ) {
95+ if (stats == null ) {
8196 stats = Generator .INSTANCE .create (key );
8297 stats .setVal (CountStatsItems .Count , 1 );
8398 stats .setVal (CountStatsItems .Stat , MULT .ONE );
8499 types .put (key , stats );
100+ if (dedup ) {
101+ bwDedup .writeRow (cols );
102+ }
85103 } else {
86104 stats .sumVal (CountStatsItems .Count , 1 );
87105 stats .setVal (CountStatsItems .Stat , MULT .MANY );
106+ if (dedup ) {
107+ bwDup .writeRow (cols );
108+ }
88109 }
89110 }
90-
111+
112+ if (dedup ) {
113+ bwDedup .close ();
114+ bwDup .close ();
115+
116+ DelimitedFileWriter bwNoDup = new DelimitedFileWriter (getNewFile (selectedFile , "no-dup" ), fileSeparator .separator );
117+ DelimitedFileWriter bwAllDup = new DelimitedFileWriter (getNewFile (selectedFile , "all-dup" ), fileSeparator .separator );
118+ dfr = new DelimitedFileReader (selectedFile , fileSeparator .separator );
119+ firstRow = (YN )getProperty (HEADROW ) == YN .Y ;
120+ for (Vector <String > cols = dfr .getRow (); cols != null ; cols = dfr .getRow ()){
121+ if (firstRow ) {
122+ firstRow = false ;
123+ if (dedup ) {
124+ bwNoDup .writeRow (cols );
125+ bwAllDup .writeRow (cols );
126+ }
127+ continue ;
128+ }
129+ String key = cols .get (col < cols .size () ? col : 0 );
130+ Stats stats = types .get (key );
131+ if (stats == null ) {
132+ } else if (stats .getVal (CountStatsItems .Stat ) == MULT .ONE ) {
133+ bwNoDup .writeRow (cols );
134+ } else {
135+ bwAllDup .writeRow (cols );
136+ }
137+ }
138+ bwNoDup .close ();
139+ bwAllDup .close ();
140+ }
141+
142+
91143 return new ActionResult (selectedFile , selectedFile .getName (), this .toString (), details , types , true , timer .getDuration ());
92144 }
93145
@@ -100,4 +152,22 @@ public String getDescription() {
100152 public String getShortName () {
101153 return "Key" ;
102154 }
155+ public String getExt (File f ) {
156+ String ext = "" ;
157+ StringTokenizer st = new StringTokenizer (f .getName (), "." );
158+ while (st .hasMoreElements ()) {
159+ ext = st .nextElement ().toString ();
160+ }
161+ return ext ;
162+ }
163+
164+ public String getNewName (File f , String suff ) {
165+ String ext = getExt (f );
166+ String bname = f .getName ();
167+ return (ext .isEmpty ()) ? bname + "." + suff : bname .replaceFirst (ext + "$" , suff + "." + ext );
168+ }
169+ public File getNewFile (File f , String suff ) {
170+ return new File (f .getParentFile (), getNewName (f , suff ));
171+ }
172+
103173}
0 commit comments