Skip to content

Commit

Permalink
sgd
Browse files Browse the repository at this point in the history
  • Loading branch information
Wei-1 committed Mar 8, 2020
1 parent 4fa3bd3 commit 1b467e6
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 9 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ A very light weight Scala machine learning library that provide some basic ML al

- [x] Multivariate Linear Regression - GD [[Code]](src/main/scala/algorithm/regression/MultivariateLinearRegression.scala) [[Usage]](src/test/scala/algorithm/regression/MultivariateLinearRegressionTest.scala)

- [x] Stochastic Gradient Decent [[Code]](src/main/scala/algorithm/regression/MultivariateLinearRegression.scala) [[Usage]](src/test/scala/algorithm/regression/MultivariateLinearRegressionTest.scala)

### Clustering :

- [x] Hierarchical [[Code]](src/main/scala/algorithm/clustering/Hierarchical.scala) [[Usage]](src/test/scala/algorithm/clustering/HierarchicalTest.scala)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ class MultivariateLinearRegression() extends Regression {
val version: String = "0.1"

var weights = Array[Double]()
var bias = Array[Double]()
var limit = 1000 // for GD
var lr = 0.01 // for GD

Expand Down
62 changes: 62 additions & 0 deletions src/main/scala/algorithm/regression/StochasticGradientDecent.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Wei Chen - Stochastic Gradient Decent
// 2020-03-08

package com.scalaml.algorithm
import com.scalaml.general.MatrixFunc._

class StochasticGradientDecent() extends Regression {
val algoname: String = "StochasticGradientDecent"
val version: String = "0.1"

var weights = Array[Double]()
var limit = 1000 // for GD
var batch = 10 // for GD
var lr = 0.01 // for GD

override def clear(): Boolean = {
weights = Array[Double]()
true
}

override def config(paras: Map[String, Any]): Boolean = try {
limit = paras.getOrElse("LIMIT", paras.getOrElse("limit", 1000)).asInstanceOf[Int]
batch = paras.getOrElse("BATCH", paras.getOrElse("batch", 10)).asInstanceOf[Int]
lr = paras.getOrElse("learning_rate", paras.getOrElse("lr", 0.01)).asInstanceOf[Double]
true
} catch { case e: Exception =>
Console.err.println(e)
false
}

// --- Start Multivariate Linear Regression Function ---
override def train(
data: Array[(Double, Array[Double])] // Data Array(yi, xi)
): Boolean = try { // Return PData Class
val dataSize = data.size
val y = data.map(_._1)
val x = data.map(_._2 :+ 1.0)
val xSize = x.head.size

for (i <- 0 until limit) {
val cut1 = (i * batch) % xSize
val cut2 = cut1 + batch
weights = gradientDescent(
x.slice(cut1, cut2),
y.slice(cut1, cut2),
lr, 1, weights
)
}
true
} catch { case e: Exception =>
Console.err.println(e)
false
}
// --- Predict Multivariate Linear Regression ---
override def predict(
data: Array[Array[Double]]
): Array[Double] = {
return data.map { d =>
(d :+ 1.0).zip(weights).map { case (x, w) => w * x }.sum
}
}
}
17 changes: 11 additions & 6 deletions src/main/scala/general/MatrixFunc.scala
Original file line number Diff line number Diff line change
Expand Up @@ -244,17 +244,22 @@ package object MatrixFunc {
m
}

def gradientDescent(x: Array[Array[Double]], y: Array[Double],
alpha: Double, limit: Int): Array[Double] = {
def gradientDescent(
x: Array[Array[Double]],
y: Array[Double],
alpha: Double,
limit: Int,
initW: Array[Double] = null
): Array[Double] = {
val xSize = x.head.size
val ySize = y.size
var theta = new Array[Double](xSize)
val w = if(initW == null || initW.size == 0) new Array[Double](xSize) else initW
for(i <- 0 until limit) {
val diff = arrayminus(x.map(xi => arraymultiply(xi, theta).sum), y)
val diff = arrayminus(x.map(xi => arraymultiply(xi, w).sum), y)
// val cost = diff.map(d => Math.pow(d, 2)).sum / ySize / 2
val gradientSum = matrixaccumulate(x.zip(diff).map { case (xi, d) => xi.map(_ * d) })
for(j <- 0 until xSize) theta(j) -= gradientSum(j) / ySize * alpha
for(j <- 0 until xSize) w(j) -= gradientSum(j) / ySize * alpha
}
theta
w
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@ class MultivariateLinearRegressionSuite extends AnyFunSuite {
assert(mlr.config(Map[String, Double]()))
assert(mlr.train(LABELED_LINEAR_DATA.map(yx => yx._1.toDouble -> yx._2)))
val result = mlr.predict(UNLABELED_LINEAR_DATA)
assert(arraysimilar(result, LABEL_LINEAR_DATA.map(_.toDouble), 0.9))
val nResult = result.map(v => if (v > 0) 1.0 else -1.0)
assert(arraysimilar(nResult, LABEL_LINEAR_DATA.map(_.toDouble), 0.9))
}

test("MultivariateLinearRegression Test : Nonlinear Data - WRONG") {
assert(mlr.clear())
assert(mlr.config(Map[String, Double]()))
assert(mlr.train(LABELED_NONLINEAR_DATA.map(yx => yx._1.toDouble -> yx._2)))
val result = mlr.predict(UNLABELED_NONLINEAR_DATA)
assert(!arraysimilar(result, LABEL_LINEAR_DATA.map(_.toDouble), 0.45))
assert(!arraysimilar(result, LABEL_NONLINEAR_DATA.map(_.toDouble), 0.45))
}

test("MultivariateLinearRegression Test : Invalid Config & Data") {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Wei Chen - Multivariate Linear Regression Test
// 2016-06-04

import com.scalaml.TestData._
import com.scalaml.general.MatrixFunc._
import com.scalaml.algorithm.StochasticGradientDecent
import org.scalatest.funsuite.AnyFunSuite

class StochasticGradientDecentSuite extends AnyFunSuite {

val sgd = new StochasticGradientDecent()

test("StochasticGradientDecent Test : Clear") {
assert(sgd.clear())
}

test("StochasticGradientDecent Test : Linear Data") {
assert(sgd.clear())
assert(sgd.config(Map[String, Double]()))
assert(sgd.train(LABELED_LINEAR_DATA.map(yx => yx._1.toDouble -> yx._2)))
val result = sgd.predict(UNLABELED_LINEAR_DATA)
val nResult = result.map(v => if (v > 0) 1.0 else -1.0)
assert(arraysimilar(nResult, LABEL_LINEAR_DATA.map(_.toDouble), 0.9))
}

test("StochasticGradientDecent Test : Nonlinear Data - WRONG") {
assert(sgd.clear())
assert(sgd.config(Map[String, Double]()))
assert(sgd.train(LABELED_NONLINEAR_DATA.map(yx => yx._1.toDouble -> yx._2)))
val result = sgd.predict(UNLABELED_NONLINEAR_DATA)
assert(!arraysimilar(result, LABEL_NONLINEAR_DATA.map(_.toDouble), 0.45))
}

test("StochasticGradientDecent Test : Invalid Config & Data") {
assert(sgd.clear())
assert(!sgd.config(Map("limit" -> "test")))
assert(!sgd.train(Array((1, Array(1, 2)), (1, Array()))))
}
}

0 comments on commit 1b467e6

Please sign in to comment.