-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrossval.R
81 lines (62 loc) · 2.17 KB
/
crossval.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#Load File, Load Packages
bank<-read.csv("bank-additional-full.csv",header=TRUE,sep=";")
library(rminer)
library(ggplot2)
set.seed(1)
#Create artificial time-axis beforehand though. The website noted that the values were chronologi-
#cally sorted. Therefore a simple itemnumber identifies a chronology.
time_axis <- as.numeric(rownames(bank))
bank_time <- cbind(bank, time_axis)
#Set modeling techniques, for more information see description in rminer documentation
models <- c("ksvm", "ctree", "mlp", "lr")
#Variable prep
C0_t <- vector(mode="character", length=0)
C1_t <- vector(mode="numeric", length=0)
C2_t <- vector(mode="numeric", length=0)
C3_t <- vector(mode="numeric", length=0)
C4_t <- vector(mode="numeric", length=0)
C5_t <- vector(mode="character", length=0)
#----------------Modeling with Rolling Window--------------------#
t <- system.time(
for (i in models)
{
for (n in 2:20) #ngroups = cross validation, minimum is 2 groups/groups shouldnt be more then n=30.
{
# reset model
M <- 0
# create model
M <- crossvaldata(y~.,bank_time,fit,predict,ngroup=n,model=i, task="prob")
cat("---Cross validation model", i, "with", n, "groups---", "\n")
C1=mmetric(bank_time$y,M$cv.fit,metric="AUC")
C2=mmetric(bank_time$y,M$cv.fit,metric="ALIFT")
C3=mmetric(bank_time$y,M$cv.fit,metric="ACC")
C0=print(n)
#print findings
cat("AUC of", i, ":", C1, "\n")
cat("ALIFT of", i, ":", C2, "\n")
cat("ACC of", i, ":", C3, "\n")
# Model label
C4 <- i
# Stack values
C0_t <- c(C0_t, C0)
C1_t <- c(C1_t, C1)
C2_t <- c(C2_t, C2)
C3_t <- c(C3_t, C3)
C4_t <- c(C4_t, print(paste(i)))
C5_t <- c(C5_t, "no_clustering")
# Memory wipe for scalability
gc()
} }
)
cat("---time---")
print(t)
#Combine Data Frame
crossval_sum <- cbind(C0_t,C4_t,C1_t,C2_t,C3_t.C5_t)
#Label Data Frame
colnames(crossval_sum) <- c("Groups","Model","AUC of ROC", "ALIFT", "ACC", "clustering")
#Show Table (back check)
head(crossval_sum)
# Write file
write.table(crossval_sum, "crossvalidation_none.txt", sep=";")
#Memory wipe
gc()