Skip to content

Commit

Permalink
2022-03-05-abnormal
Browse files Browse the repository at this point in the history
  • Loading branch information
Wei-1 committed Mar 5, 2022
1 parent 5b1c4f4 commit dfe2eda
Show file tree
Hide file tree
Showing 12 changed files with 224 additions and 5 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ A very light weight Scala machine learning library that provide some basic ML al

- [x] One Hot Encoding [[Code]](src/main/scala/algorithm/transformation/OneHot.scala) [[Usage]](src/test/scala/algorithm/transformation/OneHotTest.scala)

### Abnormal Detection :

- [x] Isolation Tree [[Code]](src/main/scala/algorithm/abnormal/IsolationTree.scala) [[Usage]](src/test/scala/algorithm/abnormal/IsolationTreeTest.scala)

- [x] Isolation Forest [[Code]](src/main/scala/algorithm/abnormal/IsolationForest.scala) [[Usage]](src/test/scala/algorithm/abnormal/IsolationForestTest.scala)


## TODO

Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/algorithm/Algorithm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ trait Algorithm {
val algotype: String
val algoname: String
val version: String
def clear: Boolean
def clear(): Boolean
def config(paras: Map[String, Any]): Boolean
}
10 changes: 10 additions & 0 deletions src/main/scala/algorithm/abnormal/Abnormal.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Wei Chen - Abnormal Detection
// 2022-03-04

package com.scalaml.algorithm

trait Abnormal extends Algorithm {
val algotype: String = "Abnormal"
def train(data: Array[Array[Double]]): Boolean
def predict(data: Array[Array[Double]]): Array[Double]
}
57 changes: 57 additions & 0 deletions src/main/scala/algorithm/abnormal/IsolationForest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Wei Chen - Isolation Forest
// 2022-03-04

package com.scalaml.algorithm
import com.scalaml.general.MatrixFunc._

class IsolationForest() extends Abnormal {
val algoname: String = "IsolationForest"
val version: String = "0.1"

var trees = Array[IsolationTree]()
var tree_n = 10 // Number of Trees
var sample_n = 10 // Number of Sample Data in a Tree
var maxLayer = 5

override def clear(): Boolean = {
trees = Array[IsolationTree]()
tree_n = 10 // Number of Trees
sample_n = 10 // Number of Sample Data in a Tree
maxLayer = 5
true
}

override def config(paras: Map[String, Any]): Boolean = try {
tree_n = paras.getOrElse("TREE_NUMBER", paras.getOrElse("tree_number", paras.getOrElse("tree_n", 10.0))).asInstanceOf[Double].toInt
sample_n = paras.getOrElse("SAMPLE_NUMBER", paras.getOrElse("sample_number", paras.getOrElse("sample_n", 10.0))).asInstanceOf[Double].toInt
maxLayer = paras.getOrElse("maxLayer", 5.0).asInstanceOf[Double].toInt
true
} catch { case e: Exception =>
Console.err.println(e)
false
}

private def randomSelect(data: Array[Array[Double]], sample_n: Int) =
scala.util.Random.shuffle(data.toList).take(sample_n).toArray

private def addTree(data: Array[Array[Double]]): Boolean = {
val itree = new IsolationTree()
var paras = Map("maxLayer" -> maxLayer.toDouble): Map[String, Any]
val check = itree.config(paras) && itree.train(data)
if(check) trees :+= itree
check
}

override def train(data: Array[Array[Double]]): Boolean = {
val data_n = data.size
if (data_n > sample_n) {
(0 until tree_n).forall(i => addTree(randomSelect(data, sample_n)))
} else addTree(data)
}

override def predict(data: Array[Array[Double]]): Array[Double] = {
matrixaccumulate(trees.map { tree =>
tree.predict(data)
}).map(_ / tree_n)
}
}
53 changes: 53 additions & 0 deletions src/main/scala/algorithm/abnormal/IsolationTree.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Wei Chen - Isolation Tree
// 2022-03-04

package com.scalaml.algorithm

class IsolationTree() extends Abnormal {
val algoname: String = "IsolationTree"
val version: String = "0.1"

var maxLayer = 5
var tree: DecisionNode = null

override def clear(): Boolean = {
maxLayer = 5
true
}

override def config(paras: Map[String, Any]): Boolean = try {
maxLayer = paras.getOrElse("maxLayer", 5.0).asInstanceOf[Double].toInt
true
} catch { case e: Exception =>
Console.err.println(e)
false
}

private def buildtree(data: Array[Array[Double]], layer: Int = 0): DecisionNode = {
val dataSize = data.size
val columnSize: Int = data.head.size
val col = scala.util.Random.nextInt(columnSize)
val colData = data.map(d => d(col))
val minV = colData.min
val maxV = colData.max
val value = (maxV - minV) * scala.util.Random.nextDouble() + minV
val (tData, fData) = data.partition { d =>
d(col) >= value
}
if (tData.size > 0 && fData.size > 0 && layer < maxLayer) {
val tnode = buildtree(tData, layer + 1)
val fnode = buildtree(fData, layer + 1)
new DecisionNode(col, value, tnode, fnode)
} else new DecisionNode(0, 0, null, null, layer)
}

override def train(data: Array[Array[Double]]): Boolean = try {
tree = buildtree(data)
true
} catch { case e: Exception =>
Console.err.println(e)
false
}

override def predict(x: Array[Array[Double]]): Array[Double] = x.map(xi => tree.predict(xi))
}
2 changes: 1 addition & 1 deletion src/test/scala/algorithm/AlgorithmTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class AlgorithmSuite extends AnyFunSuite {
class TestAlgo() extends TestType {
val algoname: String = "TestAlgo"
val version: String = "TestVersion"
override def clear: Boolean = true
override def clear(): Boolean = true
override def config(paras: Map[String, Any]): Boolean = true
override def testfunc(testinput: Int): Boolean = true
}
Expand Down
32 changes: 32 additions & 0 deletions src/test/scala/algorithm/abnormal/AbnormalTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Wei Chen - Abnormal Trait Test
// 2022-03-05

import com.scalaml.algorithm.Abnormal
import org.scalatest.funsuite.AnyFunSuite

class AbnormalSuite extends AnyFunSuite {

test("Abnormal Test : Create Sample Algo") {

class TestAlgo() extends Abnormal {
val algoname: String = "TestAlgo"
val version: String = "TestVersion"
override def clear(): Boolean = true
override def config(paras: Map[String, Any]): Boolean = true
override def train(data: Array[Array[Double]]): Boolean = true
override def predict(data: Array[Array[Double]]): Array[Double] = data.map(_ => 0)
}

val ta = new TestAlgo

assert(ta.algotype == "Abnormal")
assert(ta.algoname == "TestAlgo")
assert(ta.version == "TestVersion")
assert(ta.clear)
assert(ta.config(Map()))
assert(ta.train(Array()))
assert(ta.predict(Array()).size == 0)
assert(ta.predict(Array(Array(1))).head == 0)
}

}
31 changes: 31 additions & 0 deletions src/test/scala/algorithm/abnormal/IsolationForestTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Wei Chen - Isolation Forest Test
// 2022-03-05

import com.scalaml.TestData._
import com.scalaml.general.MatrixFunc._
import com.scalaml.algorithm.IsolationForest
import org.scalatest.funsuite.AnyFunSuite

class IsolationForestSuite extends AnyFunSuite {

val iforest = new IsolationForest()

test("IsolationForest Test : Clear") {
assert(iforest.clear())
}

test("IsolationForest Test : Abnormal Large Data") {
assert(iforest.clear())
assert(iforest.config(Map("tree_n" -> 100.0)))
assert(iforest.train(UNLABELED_LARGE_DATA))
val result = iforest.predict(UNLABELED_LARGE_DATA)
assert(arraysimilar(result, UNLABELED_LARGE_DATA.map(_ => 1.0), UNLABELED_NONLINEAR_DATA.size))
assert(result.last < result.sum / result.size)
}

test("IsolationForest Test : Invalid Data") {
assert(iforest.clear())
assert(!iforest.config(Map("maxLayer" -> "test")))
assert(!iforest.train(Array(Array(1, 2), Array())))
}
}
30 changes: 30 additions & 0 deletions src/test/scala/algorithm/abnormal/IsolationTreeTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Wei Chen - Isolation Tree Test
// 2022-03-05

import com.scalaml.TestData._
import com.scalaml.general.MatrixFunc._
import com.scalaml.algorithm.IsolationTree
import org.scalatest.funsuite.AnyFunSuite

class IsolationTreeSuite extends AnyFunSuite {

val itree = new IsolationTree()

test("IsolationTree Test : Clear") {
assert(itree.clear())
}

test("IsolationTree Test : Abnormal Large Data") {
assert(itree.clear())
assert(itree.config(Map[String, Double]()))
assert(itree.train(UNLABELED_LARGE_DATA))
val result = itree.predict(UNLABELED_LARGE_DATA)
assert(arraysimilar(result, UNLABELED_LARGE_DATA.map(_ => 1.0), UNLABELED_NONLINEAR_DATA.size))
}

test("IsolationTree Test : Invalid Data") {
assert(itree.clear())
assert(!itree.config(Map("maxLayer" -> "test")))
assert(!itree.train(Array(Array(1, 2), Array())))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class ClassificationSuite extends AnyFunSuite {
class TestAlgo() extends Classification {
val algoname: String = "TestAlgo"
val version: String = "TestVersion"
override def clear: Boolean = true
override def clear(): Boolean = true
override def config(paras: Map[String, Any]): Boolean = true
override def train(data: Array[(Int, Array[Double])]): Boolean = true
override def predict(data: Array[Array[Double]]): Array[Int] = data.map(_ => 0)
Expand Down
2 changes: 1 addition & 1 deletion src/test/scala/algorithm/clustering/ClusteringTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class ClusteringSuite extends AnyFunSuite {
class TestAlgo() extends Clustering {
val algoname: String = "TestAlgo"
val version: String = "TestVersion"
override def clear: Boolean = true
override def clear(): Boolean = true
override def config(paras: Map[String, Any]): Boolean = true
override def cluster(data: Array[Array[Double]]): Array[Int] = data.map(_ => 0)
}
Expand Down
2 changes: 1 addition & 1 deletion src/test/scala/algorithm/regression/RegressionTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class RegressionSuite extends AnyFunSuite {
class TestAlgo() extends Regression {
val algoname: String = "TestAlgo"
val version: String = "TestVersion"
override def clear: Boolean = true
override def clear(): Boolean = true
override def config(paras: Map[String, Any]): Boolean = true
override def train(data: Array[(Double, Array[Double])]): Boolean = true
override def predict(data: Array[Array[Double]]): Array[Double] = data.map(_ => 0.0)
Expand Down

0 comments on commit dfe2eda

Please sign in to comment.