forked from BIDData/BIDMach
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreadcriteo.ssc
executable file
·62 lines (59 loc) · 1.95 KB
/
readcriteo.ssc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def readcritfile(fname:String, haslabel:Int):(FMat, IMat, IMat) = {
import java.io._
import java.lang.Long
val fin = new BufferedReader(new InputStreamReader(HMat.getInputStream(fname, 0)));
var nlines = 0;
var line = fin.readLine()
while (line != null && line.length > 0) {
line = fin.readLine();
nlines += 1;
}
fin.close;
val labels = if (haslabel > 0) zeros(1, nlines) else null;
val numpart = izeros(39, nlines);
val bpart = izeros(39, nlines);
val din = new BufferedReader(new InputStreamReader(HMat.getInputStream(fname, 0)));
line = din.readLine();
var iline = 0;
while (iline < nlines) {
val parts = line.split("\t");
val nparts = parts.length
if (haslabel > 0) labels(iline) = parts(0).toInt;
var i = 0;
while (i < 13 && i+haslabel < nparts) {
if (parts(i+haslabel).length > 0) {
numpart(i, iline) = parts(i+haslabel).toInt;
bpart(i, iline) = 1;
}
i += 1;
}
while (i < 39 && i+haslabel < nparts) {
if (parts(i+haslabel).length > 0) {
numpart(i, iline) = Long.parseLong(parts(i+haslabel), 16).toInt;
bpart(i, iline) = 1;
}
i += 1;
}
line = din.readLine();
iline += 1;
}
din.close;
(labels, numpart, bpart);
}
val dir = "../data/criteo/parts/"
println("reading and converting training data")
for (i <- 0 to 91) {
val (a,b,c) = readcritfile(dir+("train%03d" format i), 1);
saveFMat(dir+("trainlabel%03d.fmat.lz4" format i), a);
saveIMat(dir+("traindata%03d.imat.lz4" format i), b);
saveIMat(dir+("trainnz%03d.imat.lz4" format i), c);
print(".")
}
println("\nreading and converting test data")
for (i <- 0 to 12) {
val (a,b,c) = readcritfile(dir+("test%03d" format i), 0);
saveFMat(dir+("testlabel%03d.fmat.lz4" format i), b);
saveIMat(dir+("testdata%03d.imat.lz4" format i), b);
saveIMat(dir+("testnz%03d.imat.lz4" format i), c);
print(".")
}