-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhistogram.py
144 lines (136 loc) · 6.17 KB
/
histogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import csv
import requests
import random
import time
import sys
import pandas as pd
import matplotlib.pyplot as plt
def plotfunc(validatorindex, plot=1):
#Plots the avg. inclusion of the validator associated with the "validatorindex"; plot=0 ==> don't plot, just return avg inclusion distance; plot=1 ==> plot & return
requrl = "https://beaconcha.in/api/v1/validator/{}/attestations".format(validatorindex)
output = requests.get(requrl)
outputJson = output.json()
totalIncDist = 0
numepochs = 0
while True:
try:
for i in outputJson["data"]:
if i['status'] == 1:
totalIncDist += i['inclusionslot'] - i['attesterslot']
numepochs += 1
except Exception as e:
continue
else:
break
if (numepochs):
if plot:
plt.axvline(x = (totalIncDist / numepochs))
return (totalIncDist / numepochs)
else:
return (-1)
def avgInclusionDistance(reqURL, retryCall):
totalID = 0
epochs = 0
tries = 0
while True:
try:
output = requests.get(reqURL).json()
#print(len(output["data"])) #use this to see how many data entries you get for each validator
for j in output["data"]:
if j["status"] == 1:
attesterslot = int(j["attesterslot"])
inclusionslot = int(j["inclusionslot"])
totalID += (inclusionslot - attesterslot)
epochs += 1
except Exception as e:
if (tries >= retryCall):
return (-1)
tries += 1
else:
if epochs:
return (totalID / epochs)
else:
return (-1) #if number of epochs is 0
def plotHistogram(chooseVal, validatorIndex, retryCall=5):
#If you do not give any parameters, the default is that 10 randomly chosen validators are plotted and no validator is compared to the randomly chosen validators.
#This function creates a histogram of the avg. inclusion distance over the last 100 epochs of the number of randomly chosen validators from all the possible validators given in "chooseVal" and compares the avg inclusion distance of the validator in validatorIndex to the other validators.
base16list = [] #contains the validator indexes in the csv file we are parsing
with open("indices.csv", "r") as fobj:
lol = csv.reader(fobj)
counter = 0
for i in lol:
base16list.append(i[1])
if i[0] == (validatorIndex):
validatorID = i[1]
#print(base16list[:3])
dfdict = {"Inclusion Distance": []}
numVal = len(base16list)
#chooseVal = 10 #how many validators' data we want
finishedVal = 0 #how many validators we have finished
workingVal = 0 #how many validators have calls that are working
valCounter = 0
while workingVal < chooseVal and finishedVal < numVal:
print("%i" % (workingVal + 1))
valCounter += 1
i = random.randint(0, numVal - 1)
reqURL = "https://beaconcha.in/api/v1/validator/%s/attestations" % base16list.pop(i)
numVal -= 1
avgInclDist = avgInclusionDistance(reqURL, retryCall)
if avgInclDist != -1:
dfdict["Inclusion Distance"].append(avgInclDist)
workingVal += 1 #working validator is one that doesn't return an error and has epoch we are getting info about
finishedVal += 1
if finishedVal == numVal:
print("You are out of validators to pick; either reduce the number of people you want to choose, increase the number of times you want to retry a call, or if you are sure neither of those is the problem, just run the program again.")
if validatorID in base16list:
reqURL = "https://beaconcha.in/api/v1/validator/%s/attestations" % validatorIndex
avgInclDist = avgInclusionDistance(reqURL, float("inf")) #don't stop calls until we get value for validatorIndex
dfdict["Inclusion Distance"].append(avgInclDist)
df = pd.DataFrame(dfdict)
incDistList = dfdict["Inclusion Distance"]
hist = df["Inclusion Distance"].plot.hist(bins=10)
plt.xlabel("Inclusion Distance")
plt.title("Histogram of Inclusion Distance For Random %i Validators" % workingVal)
plt.ylim(0, workingVal + 1)
avgvalue = plotfunc(validatorIndex)
print("Average Inclusion Distance of Validator %s: " % validatorIndex + str(avgvalue))
plt.show()
numAbove = 0
numEqorAbove = 0
#print(sorted(dfdict["Inclusion Distance"], reverse = True))
if avgvalue != -1:
for i in sorted(dfdict["Inclusion Distance"],reverse = True):
if avgvalue < i:
numAbove += 1
numEqorAbove += 1
if avgvalue == i:
numEqorAbove += 1
avgpercentiles = (((numAbove + numEqorAbove)/2)/chooseVal) * 100
print("Validator {} is in the {}th percentile of {} randomly chosen people.".format(validatorIndex, round(avgpercentiles, 2), chooseVal)) #percentile is average of percentiles from percents of higher avg. inclusion distances and higher than or equal inclusion distances
def main(): #first argument is number of Validators and second argument is validator index
begin = time.time()
arglen = len(sys.argv)
if (arglen < 3 or arglen > 4):
print("Your 1st parameter is the number of other validators you want to graph and the 2nd is the index of the validator you want to compare. The optional third parameter is the number of tries.")
exit()
try:
numberOfRandVals = int(sys.argv[1])
a = int(sys.argv[2])
if (arglen == 4):
b = int(sys.argv[3])
except Exception as e:
print("Make sure all your parameters are integers!")
exit()
valIndex = sys.argv[2]
if arglen == 4:
call = int(sys.argv[3])
assert(numberOfRandVals >= 0 and numberOfRandVals <= 87750)
assert(int(valIndex) >= 0 and int(valIndex) <= 87749)
if arglen == 3:
plotHistogram(numberOfRandVals, valIndex)
elif arglen == 4:
assert(call >= 1)
plotHistogram(numberOfRandVals, valIndex, call)
end = time.time()
#print(end - begin)
main()