-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSorter.java
More file actions
163 lines (148 loc) · 5.57 KB
/
Sorter.java
File metadata and controls
163 lines (148 loc) · 5.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
package models;
import java.io.File;
import java.io.FileWriter;
import java.sql.*;
import java.util.ArrayList;
import java.util.HashMap;
import au.com.bytecode.opencsv.CSVWriter;
import configuration.Consts;
import configuration.Filter;
/**
* Sorts using Sqlite.
*
* @author HellwigP
*/
public class Sorter extends InputFile {
/** The sqldb. */
private Connection sqldb;
/** The sortedfileext. */
private String sortedfileext = ".sorted.csv";
/** The dbfile. */
private String dbfile;
private char separator = Consts.fieldcombineseparator.charAt(0);
private char quote = CSVWriter.NO_QUOTE_CHARACTER;
/**
* Instantiates a new sorter.
*
* @param data_id the data_id
* @param path the path
* @param filetype the filetype
* @param tmppath the tmppath
* @throws Exception the exception
*/
public Sorter(String datentyp, String path, String filetype, String[] idfields, char separator, char quote, String tmppath, ArrayList<Filter> filters) throws Exception {
super(datentyp, path, filetype, idfields,separator,quote,false,filters);
//use "in memory" if possible
long allocatedMemory = (Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory());
long presumableFreeMemory = Runtime.getRuntime().maxMemory() - allocatedMemory;
File me = new File(path);
long filelength = me.length();
if (presumableFreeMemory > (filelength+filelength/2)) { //memory must be filesize + ~50%
dbfile = ":memory:";
} else {
dbfile = tmppath+"/sorter.db";
File file = new File(dbfile);
if (file.exists()) { file.delete(); };
}
Class.forName("org.sqlite.JDBC");
sqldb = DriverManager.getConnection("jdbc:sqlite:"+dbfile);
this.separator=separator;
if (quote == Character.MIN_VALUE)
this.quote=CSVWriter.NO_QUOTE_CHARACTER;
else this.quote=quote;
}
/**
* reads CSV into database. Adds a new column if needed
*
* @param sortfield the sortfield
* @return String newFile (incl. path) (=csv)
* @throws Exception the exception
*/
public String sortFileByID(boolean addColumn, String sourcecol, String targetcols[], HashMap<String,String[]> translator,boolean makeInt2Cols) throws Exception {
Statement stmt = sqldb.createStatement();
//1. create table with headers and one hash information
String createSQL = "create table sortdb (";
String insertSQL = "insert into sortdb values(";
String [] headerline = getColnames();
int colcount = headerline.length;
for (int i=0; i< colcount-1; i++) {
createSQL += "'"+headerline[i];
if (i<2 && makeInt2Cols) createSQL += "' INTEGER, ";
else createSQL += "' TEXT, ";
insertSQL += "?,";
}
if (addColumn) {
for (int i=0;i<targetcols.length; i++) insertSQL += "?,";
insertSQL += "?);";
createSQL += "'" + headerline[colcount-1] + "' TEXT, ";
for (int i=0;i<targetcols.length-1; i++) createSQL += "'"+targetcols[i] + "' TEXT, ";
createSQL += "'"+targetcols[targetcols.length-1] + "' TEXT);";
} else {
createSQL += "'" + headerline[colcount-1] + "' TEXT);";
insertSQL += "?);";
}
stmt.executeUpdate(createSQL);
stmt.close();
//2. read and import csv data
PreparedStatement prep = sqldb.prepareStatement(insertSQL);
String[] targetcolval = null;
int max_buffer = 50000;
while (this.nextRow()) {
for (int i=1; i<=max_buffer;i++) {
if (this.hasRow()) {
for (int j=0; j<colcount; j++) {
if (j<2 && makeInt2Cols)
prep.setInt(j+1, Integer.parseInt(this.getValue(headerline[j]))); //i+1 as statements start with 1
else prep.setString(j+1, this.getValue(headerline[j])); //i+1 as statements start with 1
if (addColumn && headerline[j].equals(sourcecol)) {
targetcolval=translator.get(this.getValue(headerline[j]));
}
}
if (addColumn) {
for (int j=0;j<targetcols.length; j++) {
if (targetcolval != null && targetcolval.length>j && targetcolval[j]!=null)
prep.setString(colcount+j+1,targetcolval[j]);
else prep.setString(colcount+j+1,"");
}
}
prep.addBatch();
if(i<max_buffer) this.nextRow();
} else {
break;
}
}
sqldb.setAutoCommit(false);
prep.executeBatch();
sqldb.commit();
}
this.close();
//create index -> helps sorting
stmt = sqldb.createStatement();
stmt.executeUpdate("create index sort_id on sortdb ("+ this.getIDFields() +");");
stmt.close();
//3. dump db sorted
CSVWriter outputfile = new CSVWriter(new FileWriter(this.getPath()+sortedfileext), this.separator,this.quote);
stmt = sqldb.createStatement();
ResultSet orderedTable = stmt.executeQuery( "SELECT * FROM sortdb order by "+ this.getIDFields() + ";" );
outputfile.writeAll(orderedTable, true);
outputfile.close();
stmt.close();
sqldb.close();
//Cleanup: Delete file
if (!dbfile.equals(":memory:")) {
File file = new File(dbfile);
file.delete();
}
return this.getPath()+sortedfileext;
}
//without adding a new column, but do not make columns int
public String sortFileByID() throws Exception {
return this.sortFileByID(false, null, null, null, false);
}
public String sortFileByID(boolean makeInt2Cols ) throws Exception {
return this.sortFileByID(false, null, null, null, makeInt2Cols);
}
public String sortFileByID(boolean addColumn, String sourcecol, String[] targetcols, HashMap<String,String[]> translator) throws Exception {
return this.sortFileByID(addColumn,sourcecol,targetcols,translator,false);
}
}