From 6095ecceac8a29d72eff0495da26a370dd2dae96 Mon Sep 17 00:00:00 2001 From: MarcusGitAccount Date: Fri, 22 Feb 2019 23:55:40 +0200 Subject: [PATCH] tests --- .vscode/launch.json | 70 +++++++++++++++++++++++++++ .vscode/settings.json | 3 ++ __pycache__/ball_tree.cpython-36.pyc | Bin 0 -> 2710 bytes __pycache__/heap.cpython-36.pyc | Bin 0 -> 3324 bytes ball_tree.py | 38 +++++++++------ heap.py | 36 +++++++++++--- tests.py | 46 ++++++++++++++++++ 7 files changed, 171 insertions(+), 22 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 __pycache__/ball_tree.cpython-36.pyc create mode 100644 __pycache__/heap.cpython-36.pyc create mode 100644 tests.py diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..d388962 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,70 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File (Integrated Terminal)", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + }, + { + "name": "Python: Remote Attach", + "type": "python", + "request": "attach", + "port": 5678, + "host": "localhost", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}", + "remoteRoot": "." + } + ] + }, + { + "name": "Python: Module", + "type": "python", + "request": "launch", + "module": "enter-your-module-name-here", + "console": "integratedTerminal" + }, + { + "name": "Python: Django", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/manage.py", + "console": "integratedTerminal", + "args": [ + "runserver", + "--noreload", + "--nothreading" + ], + "django": true + }, + { + "name": "Python: Flask", + "type": "python", + "request": "launch", + "module": "flask", + "env": { + "FLASK_APP": "app.py" + }, + "args": [ + "run", + "--no-debugger", + "--no-reload" + ], + "jinja": true + }, + { + "name": "Python: Current File (External Terminal)", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "externalTerminal" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..3a5001f --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "C:\\Users\\pop_m\\Anaconda3\\python.exe" +} \ No newline at end of file diff --git a/__pycache__/ball_tree.cpython-36.pyc b/__pycache__/ball_tree.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f720afeca2ae6b9916627b76c6c6397f197efac GIT binary patch literal 2710 zcmZ`*TaVjB6rSlyc7_MRx)1C?AD26yW>ge zuCgzrh<&NV58w~*m*$nH{so@+&N!QH3)q^OGuLzb&Y5|?-;d9#djH3mvA@`bPk_FQ z(mX~bndAkVaMyZMG4VlpMKBL1A*WeUbmq|{;_QeWaq0iTWgx?+{-i5AGJ47;@z+dt zW&D)Mc;!vDtFpoHWZ+h$MbcE-C~1Sdl(EbEjVe@Tl7r)9!=9KlAu7g)yvKDPROV41AGI|1k{w_R4@IE(WRsOA zQC56KmiETaX%lp%eb#RtX=6pRnA@PPepcE>&Eml5G&3*}Pc1)iYPMC6w)E8^D{`64 zmC<=N#8BhJ;rcbu8Or7(cily4c2Ow@tBI$?5eIZQpyNpa==jo?0iXaN$qw33Mi8`X zqc32f?_i%1caGV3zk}1rQ*?Z@1GmEOY!>OE0eV;b7aV-X6O8!`Ezv>}1uUZuTpgOu zG{QcJ@QhY;O$en ztLo$D6d^85zRHNCk6N0N3-zVem7a9+CMi^U zsBho^8|qZ%i$-^;;}`1C*g)q;4^4~kAk{iOUPpGJN*f~P=jjtG+A1$yNZDap*Tr#? z7FBtaP>Rs6ui7|Iq7!_PqfQ{q0}?`X=ZBN{o39Xp;fRH#(dn}#TzCGrC0b@+9J6)ks+H$+3PLwDk}3q8LT`7CNm7 z!+W{T3gt}St`SGsp+%M2IF&M?Y}%NSaQa(O7X|=n_mFiifWfbe%8$xVKE_DoJddB=brx3Zi>S@@SD3>zUxNs8XXx zI7Pol6`kDL;uV&JIu~q|pCf){Ckh-AHy)>COI~-TxC>$l|INC3&t<``-holt5u!L{ zH5jKw0q%h{?bE%mHWQ*aXpKUm**?<$V_ay;T~061J@O`9x~`J4l1jger56++pzDuN zeU8%LjRA8(dL9G8ZhVuyNp{LAPJXhhLmwZv!6U3s$J@{#OmV|d%)P?KZY8s_Ocu3F zjrs=j=l9PZ=*)Qv1hxOq#@8QXUVnnh^4WiK={YXAu5taVZ?W*4%^lDg#T0~c&W(uBD--2{&oYN1}WKuiX} zdqQUq$5^@!pdRe4$^Mj=hc+k|^ZK~Wf|uv1p@71x1g{OE@TPLDbENGaE=oK~s-n?^ zFYDvcQfpGoWk+|lI~Y1&YO=H@J2-Y1q;p_ylipq%3U|l}4Q0NF$CT#6VGxPIZrq7P GB>n-IR!A8D literal 0 HcmV?d00001 diff --git a/__pycache__/heap.cpython-36.pyc b/__pycache__/heap.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8d07ff5b0c13d6d72c356b52a157f47995c6ffea GIT binary patch literal 3324 zcma)8OK;mo5Z+yq5+zZOBu-+dMrx#O3blw4w3i?-90yI&99ki0&;$@15R7(h)7FDt z(uHhLmnw%MK>tNA{R=(xhw$2yfgXC!soyLmS~2Xl686E_$IN{5+Ar(%st%*=pR0`h z$rgSN=38j--{>Thyw3)_%Q^doNg=H_Oj=XX70QzKfzW?RN0#2OuC0W!r@ZToZ!3;d zW$8|>C+tt2c<+BC})F-_d38{f+4g zhId=Nhuc5kY`h(fB7d;`M8yZmDBA9auXUh96>smUKqvQP7^@&w_ad!i*h@y5=+Sy~ zoVmUq4#ULv$s6MmI>uYvhE**Nf_DKvz|H2OnY87eUuP-p1 zE0?caY+n+P_@#V>I6}D@A7S*$oNy@B_`G^G-``b9n5cp8w-z|IW-1no7pqA}ayKW| zPvDARZFP#x1Q@lL+EsKZdsUtAl)r#w4u$bJ-Z4VK7GnK=W|J)Eq|||j@&lnfLQ{tgoWq{Y1K#>{XoFxQ4LYO>zBF5jRLW%=h_d&H%06!rEuoQqB z2w4jFV3^z#sF?9yO0a?sfQXL0m{}8XxcZD8uD;B{vO2lrQpmI&16EcFqDT#84jP4r zL9SiO@uI|Gd^_{f(!r!BlLjpJ8VQMMOjgweZXE|_qj*Rg<*B`-TRJU_& z$t0PDdT8w1w(@Lk_XE^aXD8Hga!xh zvy)?z@MP!%2!@iM3&f2;m()QwjD0nTlH>1)GhJ3>%lS&-Wfa;4yG?w69fdUfD-}K` zKO^Rk*u)y&PAzzuqaUOKUS=7zfZ7ABUF`NT=Xcto+O)4oDaj$trRzMoZne=w3qeVTi|e|Qx1^A%mgHJU26 zZcs;MQB$eX#+T1B1IemCqK=HHKSh^$aS|k9Z=jOBk<_0NogPaI9Tg9wHEABx9jg1s z8(r5LzSahq%!xE!*O^Ukog5_!e0}Aa8D`O>+GBE;>&%UbuKC Wy8J4F#lz?!9ni~h34d1@3jYT$wt6Q3 literal 0 HcmV?d00001 diff --git a/ball_tree.py b/ball_tree.py index 1523531..28e234c 100644 --- a/ball_tree.py +++ b/ball_tree.py @@ -1,6 +1,8 @@ import numpy as np import matplotlib.pyplot as plt + from functools import cmp_to_key +from heap import Heap # Introselect is a hybrid algorithm, combining both quickselect # and median of medians @@ -18,16 +20,19 @@ class BallTree: def __init__(self, points: [[float]], metric): if points is None: raise ValueError('Dataset not provided.') + self.is_leaf = True self.center = self.radius = None self.dimension = None self.left = None self.right = None self.points = np.array(points, copy=True) - if len(points) <= 1: + if len(points) == 1: + self.center = self.points[0] return None mid = len(self.points) >> 1 + self.is_leaf = False # Computing the dimension of the greatest spread, i.e. # the dimension of points from the dataset that # spread over the largest interval @@ -39,8 +44,14 @@ def __init__(self, points: [[float]], metric): center_index = introselect_by_dimension(points, mid, self.dimension) self.center = self.points[center_index] self.radius = np.apply_along_axis(lambda point: metric(self.center, point), 1, self.points).max(0) - self.left = BallTree(self.points[:mid], metric) - self.right = BallTree(self.points[mid:], metric) + + left = self.points[:mid] + right = self.points[mid:] + + if len(left) != 0: + self.left = BallTree(left, metric) + if len(right) != 0: + self.right = BallTree(right, metric) def plot(self, plt): if len(self.points) > 1: @@ -58,18 +69,13 @@ def traverse_tree(tree_node, plt=None): traverse_tree(tree_node.left, plt) traverse_tree(tree_node.right, plt) -points = np.random.rand(100, 2) * 10000 - -plt.rcParams["font.size"] = 1 -x = points[:, 0] -y = points[:, 1] - -np.random.randint() - -plt.scatter(x, y) -plt.show() - -tree = BallTree(points, euclid_metric) - +def _knn_update(node, target, metric): + pass +def _knn_prepare(node, target, k, metric): + pass +def knn_search(node, target, k, metric, queue): + if len(queue) != 0: + if metric(node.pivot, target) > metric(): + pass \ No newline at end of file diff --git a/heap.py b/heap.py index 57225c4..1947d3b 100644 --- a/heap.py +++ b/heap.py @@ -13,6 +13,12 @@ def __len__(self): def __repr__(self): return 'Heap: %s' % self.container + def __getitem__(self, index): + return self.container[index] + + def __iter__(self): + return (item for item in self.container) + def _parent(self, index): return (index - 1) >> 1 @@ -68,17 +74,35 @@ def pop(self): return last return None - def make_heap(self): - index = len(self.container) >> 1 - while index > 0: - self._heapify(index) - index -= 1 + def is_empty(self): + return len(self) == 0 + @staticmethod + def make_heap(array, cmp): + heap = Heap(cmp) + heap.container = array + index = len(array) >> 1 + while index >= 0: + heap._heapify(index) + index -= 1 if __name__ == '__main__': - heap = Heap(cmp = lambda parent, child: parent < child) + cmp = lambda parent, child: parent < child + heap = Heap(cmp) for nbr in randint(0, 30, 5): heap.push(nbr) print(repr(heap)) heap.pop() print(repr(heap)) + + for item in heap: + print(item, end=' ') + print('') + for i in range(0, len(heap)): + print(heap[i], end=' ') + print('') + + arr = randint(0, 25, 15) + print(arr) + Heap.make_heap(arr, cmp) + print(arr) diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..85110b2 --- /dev/null +++ b/tests.py @@ -0,0 +1,46 @@ +import numpy as np +import matplotlib.pyplot as plt + +from functools import cmp_to_key +from heap import Heap +from ball_tree import BallTree, euclid_metric + +if __name__ == '__main__': + plt.title = 'KNN search.' + points = np.random.randint(100, size=(20, 2)) + + plt.rcParams["font.size"] = 1 + x = points[:, 0] + y = points[:, 1] + plt.scatter(x, y) + + tree = BallTree(points, euclid_metric) + point = np.random.randint(0, 100, 2) + x_, y_ = point + plt.plot(x_, y_, 'bo', color='red') + + distances = sorted([euclid_metric(point, candidate) for candidate in points]) + s = set(distances) + + k = 10 + cmp = lambda a, b: a[1] > b[1] + heap = Heap(cmp) + for candidate in points: + distance = euclid_metric(point, candidate) + if len(heap) < k or distance < heap[0][1]: + heap.push((candidate, distance)) + if len(heap) > k: + heap.pop() + for candidate in heap: + print(candidate) + x_, y_ = candidate[0] + plt.plot(x_, y_, 'bo', color='pink') + + print(distances[:k]) + all = True + for candidate in heap: + if not candidate[1] in s: + all = False + break + print('All? %s' % all) + plt.show() \ No newline at end of file