-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCryptocurrencyPricingAnalysis.py
239 lines (225 loc) · 9.75 KB
/
CryptocurrencyPricingAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import csv
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random as rand
import time
import math
from datetime import datetime
from EvenlySpacedNumbers import OptimalSubsampleNew
def costFunction(W, X, y):
diff = (X.dot(W) - y)
diffsq = diff * diff
return (diffsq.mean(axis=0))
def gradientDescent(W, X, y, alpha, m):
xT = X.T
errorMat = (X.dot(W) - y)
dCostdW = (1/m) * (xT.dot(errorMat))
W = W - (alpha * dCostdW)
return (W)
def mMS(x): #short for min-max scaling
xmin = x.min()
xmax = x.max()
return ((x - xmin)/(xmax - xmin))
def solveForMinW(X, y):
xT = X.T
return (np.linalg.inv(xT.dot(X)).dot(xT.dot(y)))
def forwardProp(X1, W1, W2):
X2 = (W1.T).dot(X1)
return ((W2.T).dot(X2))
def fp2(X1, W1_2, W2_3):
X2 = X1.dot(W1_2)
X3 = X2.dot(W2_3)
return(X2, X3)
def RELU(x):
x[(x < 0)] = 0
return (x)
def backProp(Xmat, Ymat, W1_2, W2_3, alpha):
#X2 = (W1.T).dot(X1.T) #X is m x n1 originally and W1 is n1 * n2 originally
for i in range(1, Xmat.shape[0]):
Xvec = Xmat[i,:]
Xvec = Xvec.reshape((-1,1))
X2 = (W1_2.T).dot(Xvec)
X3 = (W2_3.T).dot(RELU(X2))
delta = (RELU(X3) - Ymat[i,0]) #W2 is n2*n3 orignally X2 is n2 * m (n3 = 1) (does order matter?)
dCostdW2 = X2 * delta #Havermand product
dCostdW1 = (Xvec*delta).dot(W2_3.T)
W2_3 = W2_3 - alpha * dCostdW2
W1_2 = W1_2 - alpha * dCostdW1
return (W2_3, W1_2)
def neuralNetwork(X, y, N, alpha):
#3 layered NN with input units the size of N, hidden layer size of N+1, and output layer size of 1; linear activation function
W1_2 = 2*np.random.random((X.shape[1], N)) - 1 #size X features * size(hidden layer), 2 * [0,1) - 1 ==> [-1,1)
W2_3 = 2*np.random.random((N, 1)) - 1 #size (hidden layer) * (1)
y = y.reshape((-1,1)) #neat way to turn row vector into column vector
for i in range(50):
print("Iteration: " + str(i))
W2_3, W1_2 = backProp(X, y, W1_2, W2_3, alpha)
print("Cost: ")
print((0.5 * (fp2(X, W1_2, W2_3)[1] - y) ** 2).mean())
print("FINISHED")
return (fp2(X, W1_2, W2_3)[1])
def createFeatures(X, weekday):
x = X
sqrtX = np.sqrt(x)
x2 = x * x
x3 = x * x2
x4 = x * x3
x5 = x * x4
x6 = x * x5
x7 = x * x6
x8 = x * x7
x9 = x * x8
x10 = x * x9
x11 = x * x10
x12 = x * x11
x13 = x * x12
logx = np.log(x)
sinx = np.sin(x + 3.14/2) #shifted back pi/2 because this seems to fit the model better
wkd = weekday
wkd2 = weekday * weekday
xmattuple = ([1] * X.shape[0], mMS(logx), mMS(x), mMS(x2), mMS(x3), mMS(x4), mMS(x5), mMS(x6), mMS(x7), mMS(x8), mMS(x9), mMS(x10), mMS(wkd), mMS(wkd2)) #tuple of features
return (np.column_stack(xmattuple))
#Creates a line plot of ETH avg. price over time
with open("EthPricingData.txt") as obj:
priceDictReader = csv.DictReader(obj, fieldnames = ["Epoch", "Start Interval", "End Interval", "Low Price", "High Price", "Avg. Price", "Last Price"])
priceDict = {"Epoch": [], "Start Interval": [], "End Interval": [], "Low Price": [], "High Price": [], "Avg. Price": [], "Last Price": []}
for i in priceDictReader:
for j in i:
if (i[j]) and j != "Start Interval" and j != "End Interval":
priceDict[j].append(float(i[j]))
else:
priceDict[j].append(i[j])
try:
reducePercent = float(sys.argv[1])
if reducePercent > 0.95 or reducePercent < 0:
print("This program works best when amount of data to reduce is >= 0.2 and <= 0.95 and only if there is one argument.")
exit()
if len(sys.argv) != 2:
raise Exception
except Exception as e:
print("Your first argument is a decimal representing the fraction of data to reduce in the reduced model. This value can be from 0.2 <= x <= 0.95.")
exit()
epoch = np.array(priceDict["Epoch"])
avgPrice = np.array(priceDict["Avg. Price"])
startInt = np.array(priceDict["Start Interval"])
for i in (range(startInt.shape[0])):
num = (datetime.strptime(startInt[i],"%Y-%m-%dT%H:%M:%SZ").weekday())
startInt[i] = num
startInt = startInt.astype("int32")
startInt -= 5 #4 is "Friday"
startInt = np.square(startInt) #calculating distance in days from Friday squared
avgPrice = avgPrice[epoch > 0]
startInt = startInt[epoch > 0]
epoch = epoch[epoch > 0] #removed 0 value to make normalizing epoch easier
plt.title("Avg. Price Over Epoch Number")
iterations = 0
#Hypothesis is going to be of form (W_0)(1) + (W_1)(x) + (W_2)(x ** 2) + (W_3)(sin(x)) = h(x)
#Training data is going to be epochs 5000 to 17000
minEpoch = epoch.min()
minAvgPrice = avgPrice.min()
xtrain = epoch[(epoch >= 5000) & (epoch <= 17000)]
xtrain -= (minEpoch - 1) #normalizing the epoch values
weekdayTrain = startInt[(epoch >= 5000) & (epoch <= 17000)]
xtest = epoch[(epoch > 17000)]
xtest -= (minEpoch - 1)
weekdayTest = startInt[(epoch > 17000)]
ytrain = avgPrice[(epoch >= 5000) & (epoch <= 17000)]
ytrain -= minAvgPrice #normalizing the avg price values
ytest = avgPrice[(epoch > 17000)]
ytest -= minAvgPrice
plt.xlabel("Epochs - " + str(int(minEpoch - 1)))
plt.ylabel("Avg Price - " + str(round(minAvgPrice, 2)))
plt.plot(xtrain, ytrain, label = "Actual")
onesList = []
m = xtrain.shape[0]
for i in range(m): #size doesn't count 0 index??
onesList.append(1)
oneArr = np.array(onesList)
xmat = createFeatures(xtrain, weekdayTrain)
weightlist = [rand.randint(0,0) for i in range(xmat.shape[1])]
weightArr = np.array(weightlist)
alpha = 0.1
iterList = [] #list of all iterations
costList = [] #list of cost at each iteration
for i in range(1,10**5):
costList.append(costFunction(weightArr, xmat, ytrain))
iterList.append(i)
weightArr = gradientDescent(weightArr, xmat, ytrain, alpha, m)
iterArr = np.array(iterList)
costArr = np.array(costList)
Wmat = solveForMinW(xmat, ytrain)
pred = xmat.dot(Wmat)
print("Full Data Training cost: ", end = "")
print(costFunction(Wmat, createFeatures(xtrain, weekdayTrain), ytrain))
print("Full Data Testing cost: ", end = "")
print(costFunction(Wmat, createFeatures(xtest, weekdayTest), ytest))
#nnwpred = neuralNetwork(xmat, ytrain, xmat.shape[1] + 7, 0.0001) #a neural network method to find line of fit - was too straight for some reason
plt.plot(xtrain, pred, label = "Full Data") #change pred to nnwpred to see neural network and uncomment next two lines of code
#plt.show()
#exit()
windowmatrix = [[i,j] for i,j in zip(xtrain.tolist(), pred.tolist())] #list of normalized epochs and their normalized predicted values
listOfIncrDecrIntervals = []
if windowmatrix[1][1] - windowmatrix[0][1] < 0:
decr, incr = True, False
else:
decr, incr = False, True
listOfIncrDecrIntervals.append([windowmatrix[0][0]])
#decr,incr = True, False if windowmatrix[1][0] - windowmatrix[0][0] < 0 else False, True
for counter, epochanddollars in enumerate(windowmatrix):
if decr and counter:
if windowmatrix[counter-1][1] < windowmatrix[counter][1]:
listOfIncrDecrIntervals[-1].append(windowmatrix[counter-1][0])
listOfIncrDecrIntervals[-1].append("decr")
listOfIncrDecrIntervals.append([windowmatrix[counter][0]])
decr,incr = False, True
elif incr and counter:
if windowmatrix[counter-1][1] > windowmatrix[counter][1]:
listOfIncrDecrIntervals[-1].append(windowmatrix[counter-1][0])
listOfIncrDecrIntervals[-1].append("incr")
listOfIncrDecrIntervals.append([windowmatrix[counter][0]])
decr,incr = True, False
listOfIncrDecrIntervals[-1].append(windowmatrix[-1][0])
listOfIncrDecrIntervals[-1].append("incr" if incr else "decr")
#print(listOfIncrDecrIntervals)
for i in listOfIncrDecrIntervals:
plt.axvline(x=i[0])
xlst = []
ylst = []
daylst = []
#left inclusive, right exclusive
for i in listOfIncrDecrIntervals:
size = i[1] - i[0]
startEpoch = i[0] + (minEpoch - 1) #denormalize these values)
endEpoch = i[1] + (minEpoch - 1)
#reducedSize = round(size * 0.8) #reduce amount of values by 30%
values = avgPrice[(epoch >= startEpoch) & (epoch < endEpoch)]
values -= minAvgPrice
valList = sorted(values.tolist())
reducedSize = round(len(valList) * (1 - reducePercent))
if i[1] > i[0]:
y = OptimalSubsampleNew.optimalSubsample(valList, reducedSize)
if (i[2] == "decr"):
y = y[::-1]
ylst.extend(y)
xinterval = (np.linspace(i[0], i[1], reducedSize))
daylst.extend((startInt[(np.round(xinterval - epoch.min())).astype("int32")]).tolist())
xlst.extend(xinterval)
xrarr = np.array(xlst)
yrarr = np.array(ylst)
dayarr = np.array(daylst)
if len(xlst) == len(ylst) and len(xlst) == len(daylst):
print("Number of Values in Reduced X and Y and day vectors are same.")
else:
print("Number of Values in Reduced X and Y and day vectors are NOT the same. Please check the code for any errors.")
xmat2 = createFeatures(xrarr, dayarr)
redW = solveForMinW(xmat2, yrarr)
print("Reduced Data Training Cost: ", end = "")
print(costFunction(redW, createFeatures(xtrain, weekdayTrain), ytrain))
print("Reduced Data Testing Cost: ", end = "")
print(costFunction(redW, createFeatures(xtest, weekdayTest), ytest))
pred2 = xmat2.dot(redW)
plt.plot(xrarr, pred2, label="Reduced by {}".format(round(reducePercent, 2)))
plt.legend(loc="best")
plt.show()