-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreateCampaignCategoricalModeler.py
165 lines (127 loc) · 5.62 KB
/
createCampaignCategoricalModeler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# createCampaignDiscreteRegressioCategoricalModeler provides definitions for:
# 1) data collection functions, used to obtain all currently existing data,
# a cost for collecting some specific set of experimental data, a specific
# set of new experimental data,
# 2) the types of dependent (measured) variables that can be returned,
# 3) the types and ranges of independent (experimental) variables to be explored,
# 4) the set of definitions for model construction functions,
# 5) the active learning function to be used, and,
# 6) the objective function to be optimized along with the termination criteria.
# The argument specifies how many independent variables to create.
# Useful library for handling iterators
import itertools
# Important for storing results and history of campaign runs on disk or on a remote server
from database import *
import datetime
# directories
import os
import util
#arguments:
# Number of Independent Variables
# Number of Values per Independent Variable
# Number of Groups per Independent Variable
# Amount of Added Noise
# Number of Experiments per Round
def main(*arg):
NumberOfIvars = arg[0]
NumberPerIvar = arg[1]
NumberGroups = arg[2]
AddedNoise = arg[3]
ExperimentsPerRound = arg[4]
UseActiveLearning = arg[5]
class campaign:
def initAvars(self):
return
# this goal function stops when min confidence is >0.9 and
# R2 is greater than 0.9
def goalTether(self, arg):
from tetherCat import main
return main(arg)
# this modeler fits a row/column cluster model and estimates
# confidence in predictions from distance to nearest sampled point
def modelData(self, *arg):
from catModel11 import main
return main(*arg)
# this active learner chooses randomly among the points
# with estimated accuracy below 0.9 and stops when there are
# not enough points to fill a batch
def activeLearner(self, arg):
from actLearn_cat import main
return main(arg)
def fetchData(self, arg1, arg2):
from getGroundTruthData import main
return main(arg1, arg2)
def provideNames():
print("iVarNames:")
print(campaign.ESS.iVarNames)
print("iVarCategories:")
print(campaign.ESS.iVarCategories)
class ESS:
# define data specifications
if NumberOfIvars!=2:
raise ValueError("Number of Independent Variables must be 2:" + str(NumberOfIvars))
#trailing comma creates tuple of tuples
iVars = (('int',0,NumberPerIvar-1),)*NumberOfIvars
#print(iVars)
iVarNames = ["iVar"+str(i) for i in range(NumberOfIvars)]
#print(iVarNames)
iVarCategories = [()]*NumberOfIvars
#print(iVarCategories)
dVars = [('int', 1, )]
#print(dVars)
listTuples = util.createTuples(iVars)
dimarr = []
for i in range(len(iVars)):
dimarr.append(iVars[i][2] - iVars[i][1] + 1)
# constraints on data collection function(s), e.g. QC
'''
class acquisitionOptions:
replicates = [] # 4. Data acquisition function
tolerance = [] # 4. Data acquisition function
cost = [0] # 4. Data acquisition function
getcurrent = [] # 5. Data access object
'''
# all data is initialized as np.nan inside database.py
data = Database(ESS.dimarr,
'catmodel_test_' + str(NumberOfIvars) + '_' +
str(datetime.datetime.now()), 0, reset=True)
confCount = ExperimentsPerRound
NGroups = [NumberGroups]*NumberOfIvars
groundTruth = generate_ground_truth_categorical(ESS.listTuples,NGroups,AddedNoise)
# do not use pairwise accuracy, because we will have ground truth;
# otherwise, accuracy assessment used when
# data is unlabeled and classes are assigned arbitrarily round to round
pairwise = False
# active learning (as opposed to random)
flag = UseActiveLearning
hasClassifier = False
# Clustering model only (Conf1) or Clustering model plus Mode (Conf2)
confAssignmentChoice = 'Conf1'
# simulation?
simsFlag = True
class plotting:
title = "Accuracy as Fraction of Experimental Space Coverage Increased"
xlabel = "Experimental Space Coverage"
ylabel = "Accuracy"
filename = "BioActive_LinReCatModelg("+str(NumberOfIvars) + "iVar)Campaign_batchsize" + str(ExperimentsPerRound) + ".tif"
intDir = 'simsDirectory_' + str(datetime.datetime.now()) + '/'
filename = intDir + filename
C = campaign()
return C
def generate_ground_truth_categorical(listTuples,nGroups,AddedNoise):
# create categorically structured data and store it both as framed data
# as well as a 1D array for assessment and use of ground truth
# respectively
xs = listTuples[:, 0]
ys = listTuples[:, 1]
allData = np.empty(len(xs))
for i in range(len(allData)):
allData[i] = 1 + xs[i] % nGroups[0] + ys[i] % nGroups[1]
if AddedNoise!=0:
rng = np.random.default_rng()
randuni = rng.random(len(allData))
changeit = np.where(randuni < AddedNoise)
newvals = rng.integers(low=1, high=max(allData), size=len(changeit))
for i in range(len(changeit)):
allData[changeit[i]] = newvals[i]
return allData