Skip to content

Commit

Permalink
2022-12-27-gradient-boost
Browse files Browse the repository at this point in the history
  • Loading branch information
Wei-1 committed Dec 27, 2022
1 parent 2b91a7d commit 27e4ba5
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 3 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ A very light weight Scala machine learning library that provide some basic ML al

- Scala 2.13

- Sbt 1.2
- Sbt 2.1


## This package includes
Expand Down Expand Up @@ -54,6 +54,8 @@ A very light weight Scala machine learning library that provide some basic ML al

- [x] Stochastic Gradient Decent [[Code]](src/main/scala/algorithm/regression/MultivariateLinearRegression.scala) [[Usage]](src/test/scala/algorithm/regression/MultivariateLinearRegressionTest.scala)

- [x] Gradient Boost [[Code]](src/main/scala/algorithm/regression/GradientBoost.scala) [[Usage]](src/test/scala/algorithm/regression/GradientBoostTest.scala)

### Clustering :

- [x] Hierarchical [[Code]](src/main/scala/algorithm/clustering/Hierarchical.scala) [[Usage]](src/test/scala/algorithm/clustering/HierarchicalTest.scala)
Expand Down
41 changes: 41 additions & 0 deletions src/main/scala/algorithm/regression/GradientBoost.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Wei Chen - Gradient Boost
// 2022-12-27

package com.scalaml.algorithm
import com.scalaml.general.MatrixFunc._

class GradientBoost() extends Regression {
val algoname: String = "GradientBoost"
val version: String = "0.1"

var regressors = Array[Regression]()

override def clear(): Boolean = {
regressors = Array[Regression]()
true
}

override def config(paras: Map[String, Any]): Boolean = try {
regressors = paras.getOrElse("REGRESSORS", paras.getOrElse("regressors", Array(new StochasticGradientDecent): Any)).asInstanceOf[Array[Regression]]
true
} catch { case e: Exception =>
Console.err.println(e)
false
}

override def train(data: Array[(Double, Array[Double])]): Boolean = {
var check = regressors.size > 0
var residue = Array.fill(data.size)(0.0)
for (regressor <- regressors) {
val tmpdata = data.zip(residue).map { case (d, r) => (d._1 + r, d._2) }
check &= regressor.train(tmpdata)
residue = arrayminus(data.map(_._1), regressor.predict(data.map(_._2)))
}
check
}

override def predict(data: Array[Array[Double]]): Array[Double] = {
val results = regressors.map(regressor => regressor.predict(data))
matrixaccumulate(results).map(_ / regressors.size)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class StochasticGradientDecent() extends Regression {
false
}

// --- Start Multivariate Linear Regression Function ---
// --- Start Stochastic Gradient Decent Function ---
override def train(
data: Array[(Double, Array[Double])] // Data Array(yi, xi)
): Boolean = try { // Return PData Class
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/general/MatrixFunc.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ package object MatrixFunc {
def matrixequal(x: Array[Array[Double]], y: Array[Array[Double]]): Boolean =
x.zip(y).forall(l => l._1.zip(l._2).forall(equalfunc))
def matrixaccumulate(x: Array[Array[Double]]): Array[Double] =
x.reduceLeft((x, y) => (x.zip(y).map(sumfunc)))
x.reduceLeft((a1, a2) => (a1.zip(a2).map(sumfunc)))
def matrixsum(x: Array[Array[Double]], y: Array[Array[Double]]): Array[Array[Double]] =
x.zip(y).map { case (a1, a2) => a1.zip(a2).map(sumfunc) }
def matrixminus(x: Array[Array[Double]], y: Array[Array[Double]]): Array[Array[Double]] =
Expand Down
55 changes: 55 additions & 0 deletions src/test/scala/algorithm/regression/GradientBoostTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Wei Chen - Gradient Boost Test
// 2022-12-27

import com.scalaml.TestData._
import com.scalaml.general.MatrixFunc._
import com.scalaml.algorithm._
import org.scalatest.funsuite.AnyFunSuite

class GradientBoostSuite extends AnyFunSuite {

val gb = new GradientBoost()

test("GradientBoost Test : Clear") {
assert(gb.clear())
}

test("GradientBoost Test : Linear Data") {
assert(gb.clear())
assert(gb.config(Map[String, Any]()))
assert(gb.train(LABELED_LINEAR_DATA.map(yx => yx._1.toDouble -> yx._2)))
val result = gb.predict(UNLABELED_LINEAR_DATA)
val nResult = result.map(v => if (v > 0) 1.0 else -1.0)
assert(arraysimilar(nResult, LABEL_LINEAR_DATA.map(_.toDouble), 0.9))
}

test("GradientBoost Test : Nonlinear Data, 1 Linear Model - WRONG") {
assert(gb.clear())
assert(gb.config(Map[String, Any]()))
assert(gb.train(LABELED_NONLINEAR_DATA.map(yx => yx._1.toDouble -> yx._2)))
val result = gb.predict(UNLABELED_NONLINEAR_DATA)
assert(!arraysimilar(result, LABEL_NONLINEAR_DATA.map(_.toDouble), 0.45))
}

// More linear regressors will not solve nonlinear problems
test("GradientBoost Test : Nonlinear Data, 5 Linear Models - WRONG") {
val regressors: Any = Array(
new StochasticGradientDecent,
new StochasticGradientDecent,
new StochasticGradientDecent,
new StochasticGradientDecent,
new StochasticGradientDecent
)
assert(gb.clear())
assert(gb.config(Map("regressors" -> regressors)))
assert(gb.train(LABELED_NONLINEAR_DATA.map(yx => yx._1.toDouble -> yx._2)))
val result = gb.predict(UNLABELED_NONLINEAR_DATA)
assert(!arraysimilar(result, LABEL_NONLINEAR_DATA.map(_.toDouble), 0.45))
}

test("GradientBoost Test : Invalid Config & Data") {
assert(gb.clear())
assert(!gb.config(Map("regressors" -> "test")))
assert(!gb.train(Array((1, Array(1, 2)), (1, Array()))))
}
}

0 comments on commit 27e4ba5

Please sign in to comment.