Skip to content

Commit

Permalink
UpperConfidenceBound
Browse files Browse the repository at this point in the history
  • Loading branch information
Wei-1 committed Mar 8, 2020
1 parent 92b681a commit 4fa3bd3
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 4 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ A very light weight Scala machine learning library that provide some basic ML al

- [x] Epsilon Greedy Search [[Code]](src/main/scala/algorithm/optimization/EpsilonGreedy.scala) [[Usage]](src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala)

- [x] Upper Confidence Bound [[Code]](src/main/scala/algorithm/optimization/UpperConfidenceBound.scala) [[Usage]](src/test/scala/algorithm/optimization/UpperConfidenceBoundTest.scala)

### Reinforcement Learning :

- [x] Naive Feedback [[Code]](src/main/scala/algorithm/reinforcement/NaiveFeedback.scala) [[Usage]](src/test/scala/algorithm/reinforcement/NaiveFeedbackTest.scala)
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/algorithm/optimization/EpsilonGreedy.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class EpsilonGreedy {
if (scores != null)
currentScores = scores
if (currentScores == null)
currentScores = new Array[Double](size)
currentScores = Array.fill[Double](size)(Double.MinValue)
if (math.random < epsilon) {
val randSelect = (math.random * size).toInt
val value = evaluation(choices(randSelect))
Expand Down
39 changes: 39 additions & 0 deletions src/main/scala/algorithm/optimization/UpperConfidenceBound.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Wei Chen - Upper Confidence Bound
// 2020-03-08

package com.scalaml.algorithm

class UpperConfidenceBound {
var currentStats: Array[(Double, Int)] = null

def select(c: Double): Int = {
val n = currentStats.count(_._2 > 0)
val currentScores = currentStats.map { case (m, kn) =>
m + c * math.sqrt(math.log(n + 1) / (kn + 1e-12))
}
currentScores.indexOf(currentScores.max)
}

def add(i: Int, value: Double) {
val (currentValue, currentCount) = currentStats(i)
val newValue = (currentValue * currentCount + value) / (currentCount + 1)
currentStats(i) = (newValue, currentCount + 1)
}

def search(
evaluation: Array[Double] => Double,
choices: Array[Array[Double]],
scores: Array[(Double, Int)] = null,
c: Double = 1
): Array[Double] = {
val size = choices.size
if (scores != null)
currentStats = scores
if (currentStats == null)
currentStats = Array.fill[(Double, Int)](size)((0, 0))
val currentSelect = select(c)
val value = evaluation(choices(currentSelect))
add(currentSelect, value)
choices(currentStats.indexOf(currentStats.maxBy(_._1)))
}
}
6 changes: 3 additions & 3 deletions src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ class EpsilonGreedySuite extends AnyFunSuite {
)
val epsilon: Double = 0.1

test("GeneAlgorithm Test : Initial") {
test("EpsilonGreedy Test : Initial") {
assert(eg.currentScores == null)
}

test("GeneAlgorithm Test : Search - Start") {
test("EpsilonGreedy Test : Search - Start") {
for (i <- 0 until 1000)
eg.search(evaluation, choices, null, epsilon)
assert(eg.currentScores.size == choices.size)
Expand All @@ -33,7 +33,7 @@ class EpsilonGreedySuite extends AnyFunSuite {
assert((best.head - 0.7).abs < 0.05)
}

test("GeneAlgorithm Test : Search - Continue") {
test("EpsilonGreedy Test : Search - Continue") {
var scores: Array[Double] = Array(0, 0, 1 / 1.3, 0)
for (i <- 0 until 1000) {
eg.search(evaluation, choices, scores, epsilon)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Wei Chen - Upper Confidence Bound Test
// 2020-03-08

import com.scalaml.general.MatrixFunc._
import com.scalaml.algorithm.UpperConfidenceBound
import org.scalatest.funsuite.AnyFunSuite

class UpperConfidenceBoundSuite extends AnyFunSuite {

val ucb = new UpperConfidenceBound()


def evaluation(arr: Array[Double]): Double = 1 / ((arr.head - 0.7).abs + 1)

val choices: Array[Array[Double]] = Array(
Array(0.7),
Array(0.8),
Array(1.0),
Array(0.5)
)
val c: Double = 1

test("UpperConfidenceBound Test : Initial") {
assert(ucb.currentStats == null)
}

test("UpperConfidenceBound Test : Search - Start") {
for (i <- 0 until 100)
ucb.search(evaluation, choices, null, c)
assert(ucb.currentStats.size == choices.size)

val best = ucb.search(evaluation, choices, null, c)
assert((best.head - 0.7).abs < 0.05)
}

test("UpperConfidenceBound Test : Search - Continue") {
var stats: Array[(Double, Int)] = Array(
(0, 0),
(0, 0),
(1 / 1.3, 1),
(0, 0)
)
for (i <- 0 until 100) {
ucb.search(evaluation, choices, stats, c)
stats = ucb.currentStats
}
assert(ucb.currentStats.size == stats.size)

val best = ucb.search(evaluation, choices, stats, c)
assert((best.head - 0.7).abs < 0.05)
}

}

0 comments on commit 4fa3bd3

Please sign in to comment.