-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfreeLunchNN.Rmd
107 lines (83 loc) · 3.02 KB
/
freeLunchNN.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
---
title: "Stor 565 Final Project"
output: html_notebook
---
```{r}
library(readr)
fullData = read.csv("food_data.csv")
```
```{r}
library(neuralnet)
```
```{r}
# Only use if you need to look for some variables you can use
#plot(PCT_FREE_LUNCH14~., data = fullData)
```
```{r}
colNames = c("PCT_FREE_LUNCH14", "PCT_LACCESS_POP15", "PCT_LACCESS_SNAP15", "PCT_LACCESS_CHILD15", "PCT_LACCESS_WHITE15", "PCT_LACCESS_BLACK15", "PCT_LACCESS_HISP15", "PCT_LACCESS_NHASIAN15", "PCT_LACCESS_NHNA15", "PCT_LACCESS_NHPI15", "PCT_LACCESS_MULTIR15", "PCT_SNAP16", "PC_SNAPBEN15", "PC_WIC_REDEMP12", "PCT_OBESE_ADULTS13", "FOODINSEC_13_15", "FOODINSEC_CHILD_03_11", "MILK_PRICE10", "PCT_NHWHITE10", "PCT_NHBLACK10", "PCT_HISP10", "PCT_NHASIAN10", "PCT_NHNA10", "PCT_NHPI10", "MEDHHINC15", "CHILDPOVRATE15", "SNAPS16")
cleanData = fullData[complete.cases(fullData[colNames]),]
nnData = cleanData[names(cleanData) %in% colNames]
```
```{r}
library(caTools)
indexes <- sample.split(seq_len(nrow(nnData)), 0.5)
maxs <-apply(nnData, 2, max)
mins <- apply(nnData, 2, min)
scaled <- as.data.frame(scale(nnData, center = mins, scale = maxs - mins))
train = scaled[indexes,]
test = scaled[!indexes,]
trainUS = nnData[indexes,]
testUS = nnData[!indexes,]
```
```{r}
library(caret)
set.seed(15)
# Partition data into folds, set up storage for calculation
folds <- createFolds(train$PCT_FREE_LUNCH14, k = 5, list = TRUE, returnTrain = FALSE)
errorStore = array(rep(0, 5*15*9), dim = c(5, 15, 9))
for(i in c(1:5)){ # Over which fold is held out
for(j in c(2:15)) { # Over number of neurons in hidden layer 1
for(k in c(2:9)) { # Over number of neurons in hidden player 2
valNet = neuralnet(PCT_FREE_LUNCH14~., data = train[-folds[[i]],], hidden = c(j,k), linear.output = TRUE)
pred <- compute(valNet, train[folds[[i]],])
pred_ <- pred$net.result*(max(nnData$PCT_FREE_LUNCH14) - min(nnData$PCT_FREE_LUNCH14)) + min(nnData$PCT_FREE_LUNCH14)
testReal <- (train[folds[[i]],]$PCT_FREE_LUNCH14) * (max(nnData$PCT_FREE_LUNCH14) - min(nnData$PCT_FREE_LUNCH14))+min(nnData$PCT_FREE_LUNCH14)
MSE.nn <- sum((testReal - pred_)^2)/nrow(train[folds[[i]],])
errorStore[i,j,k] = MSE.nn
}
}
}
errorStore
```
```{r}
cvMSE = array(rep(0, 15*9), dim = c(15,9))
for(i in c(1:15)) {
for(j in c(1:9)) {
cvMSE[i,j] = mean(errorStore[1:5,i,j])
}
}
cvMSE
```
```{r}
finalNN = neuralnet(PCT_FREE_LUNCH14~., data = train, hidden = c(3,2), linear.output = TRUE)
```
```{r}
plot(finalNN)
```
```{r}
predVals <- compute(finalNN, test)
predVals_ <- predVals$net.result*(max(nnData$PCT_FREE_LUNCH14) - min(nnData$PCT_FREE_LUNCH14)) + min(nnData$PCT_FREE_LUNCH14)
testR <- (test$PCT_FREE_LUNCH14) * (max(nnData$PCT_FREE_LUNCH14) - min(nnData$PCT_FREE_LUNCH14))+min(nnData$PCT_FREE_LUNCH14)
MSE.nn <- sum((testR - predVals_)^2)/nrow(test)
MSE.nn
(MSE.nn)^.5
```
```{r}
plot(predVals_~testUS$PCT_FREE_LUNCH14)
abline(1,1, col = 'red')
```
```{r}
errors = testR - predVals_
median(abs(errors))
hist(errors)
```