diff --git a/README.md b/README.md index 224720e..6ee15ed 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,8 @@ A very light weight Scala machine learning library that provide some basic ML al - [x] Epsilon Greedy Search [[Code]](src/main/scala/algorithm/optimization/EpsilonGreedy.scala) [[Usage]](src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala) +- [x] Upper Confidence Bound [[Code]](src/main/scala/algorithm/optimization/UpperConfidenceBound.scala) [[Usage]](src/test/scala/algorithm/optimization/UpperConfidenceBoundTest.scala) + ### Reinforcement Learning : - [x] Naive Feedback [[Code]](src/main/scala/algorithm/reinforcement/NaiveFeedback.scala) [[Usage]](src/test/scala/algorithm/reinforcement/NaiveFeedbackTest.scala) diff --git a/src/main/scala/algorithm/optimization/EpsilonGreedy.scala b/src/main/scala/algorithm/optimization/EpsilonGreedy.scala index d243093..28e5380 100644 --- a/src/main/scala/algorithm/optimization/EpsilonGreedy.scala +++ b/src/main/scala/algorithm/optimization/EpsilonGreedy.scala @@ -16,7 +16,7 @@ class EpsilonGreedy { if (scores != null) currentScores = scores if (currentScores == null) - currentScores = new Array[Double](size) + currentScores = Array.fill[Double](size)(Double.MinValue) if (math.random < epsilon) { val randSelect = (math.random * size).toInt val value = evaluation(choices(randSelect)) diff --git a/src/main/scala/algorithm/optimization/UpperConfidenceBound.scala b/src/main/scala/algorithm/optimization/UpperConfidenceBound.scala new file mode 100644 index 0000000..2e9827f --- /dev/null +++ b/src/main/scala/algorithm/optimization/UpperConfidenceBound.scala @@ -0,0 +1,39 @@ +// Wei Chen - Upper Confidence Bound +// 2020-03-08 + +package com.scalaml.algorithm + +class UpperConfidenceBound { + var currentStats: Array[(Double, Int)] = null + + def select(c: Double): Int = { + val n = currentStats.count(_._2 > 0) + val currentScores = currentStats.map { case (m, kn) => + m + c * math.sqrt(math.log(n + 1) / (kn + 1e-12)) + } + currentScores.indexOf(currentScores.max) + } + + def add(i: Int, value: Double) { + val (currentValue, currentCount) = currentStats(i) + val newValue = (currentValue * currentCount + value) / (currentCount + 1) + currentStats(i) = (newValue, currentCount + 1) + } + + def search( + evaluation: Array[Double] => Double, + choices: Array[Array[Double]], + scores: Array[(Double, Int)] = null, + c: Double = 1 + ): Array[Double] = { + val size = choices.size + if (scores != null) + currentStats = scores + if (currentStats == null) + currentStats = Array.fill[(Double, Int)](size)((0, 0)) + val currentSelect = select(c) + val value = evaluation(choices(currentSelect)) + add(currentSelect, value) + choices(currentStats.indexOf(currentStats.maxBy(_._1))) + } +} diff --git a/src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala b/src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala index 5e8f2fb..e50331d 100644 --- a/src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala +++ b/src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala @@ -20,11 +20,11 @@ class EpsilonGreedySuite extends AnyFunSuite { ) val epsilon: Double = 0.1 - test("GeneAlgorithm Test : Initial") { + test("EpsilonGreedy Test : Initial") { assert(eg.currentScores == null) } - test("GeneAlgorithm Test : Search - Start") { + test("EpsilonGreedy Test : Search - Start") { for (i <- 0 until 1000) eg.search(evaluation, choices, null, epsilon) assert(eg.currentScores.size == choices.size) @@ -33,7 +33,7 @@ class EpsilonGreedySuite extends AnyFunSuite { assert((best.head - 0.7).abs < 0.05) } - test("GeneAlgorithm Test : Search - Continue") { + test("EpsilonGreedy Test : Search - Continue") { var scores: Array[Double] = Array(0, 0, 1 / 1.3, 0) for (i <- 0 until 1000) { eg.search(evaluation, choices, scores, epsilon) diff --git a/src/test/scala/algorithm/optimization/UpperConfidenceBoundTest.scala b/src/test/scala/algorithm/optimization/UpperConfidenceBoundTest.scala new file mode 100644 index 0000000..436dda7 --- /dev/null +++ b/src/test/scala/algorithm/optimization/UpperConfidenceBoundTest.scala @@ -0,0 +1,53 @@ +// Wei Chen - Upper Confidence Bound Test +// 2020-03-08 + +import com.scalaml.general.MatrixFunc._ +import com.scalaml.algorithm.UpperConfidenceBound +import org.scalatest.funsuite.AnyFunSuite + +class UpperConfidenceBoundSuite extends AnyFunSuite { + + val ucb = new UpperConfidenceBound() + + + def evaluation(arr: Array[Double]): Double = 1 / ((arr.head - 0.7).abs + 1) + + val choices: Array[Array[Double]] = Array( + Array(0.7), + Array(0.8), + Array(1.0), + Array(0.5) + ) + val c: Double = 1 + + test("UpperConfidenceBound Test : Initial") { + assert(ucb.currentStats == null) + } + + test("UpperConfidenceBound Test : Search - Start") { + for (i <- 0 until 100) + ucb.search(evaluation, choices, null, c) + assert(ucb.currentStats.size == choices.size) + + val best = ucb.search(evaluation, choices, null, c) + assert((best.head - 0.7).abs < 0.05) + } + + test("UpperConfidenceBound Test : Search - Continue") { + var stats: Array[(Double, Int)] = Array( + (0, 0), + (0, 0), + (1 / 1.3, 1), + (0, 0) + ) + for (i <- 0 until 100) { + ucb.search(evaluation, choices, stats, c) + stats = ucb.currentStats + } + assert(ucb.currentStats.size == stats.size) + + val best = ucb.search(evaluation, choices, stats, c) + assert((best.head - 0.7).abs < 0.05) + } + +}