-
-
Notifications
You must be signed in to change notification settings - Fork 476
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Neural networks perceptron algorithm #182
Open
mzarnecki
wants to merge
3
commits into
TheAlgorithms:master
Choose a base branch
from
mzarnecki:neural-networks-perceptron-algorithm
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
192 changes: 192 additions & 0 deletions
192
NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
<?php | ||
|
||
namespace NeuralNetworks\PerceptronClassifier; | ||
|
||
/** | ||
* This class implements a simple neural network with one hidden layer and one output neuron. | ||
* The network uses the sigmoid activation function and performs binary classification. | ||
* (https://cw.fel.cvut.cz/b211/courses/be5b33rpz/labs/07_perceptron/start) | ||
* | ||
* @author Michał Żarnecki https://github.com/rzarno | ||
*/ | ||
class NeuralNetworkPerceptronClassifier | ||
{ | ||
/** | ||
* @param array $X | ||
* @param array $Y | ||
* @param int $iterations | ||
* @param float $learningRate | ||
* @return array | ||
*/ | ||
public function trainModel(array $X, array $Y, int $iterations, float $learningRate): array | ||
{ | ||
[$W, $b] = $this->initParams(count($X)); | ||
|
||
for ($i = 0; $i < $iterations; $i++) { | ||
// Forward propagation | ||
$A = $this->forwardPropagation($X, $W, $b); | ||
|
||
// Compute cost | ||
$cost = $this->computeCost($A, $Y); | ||
|
||
// Backward propagation | ||
[$dW, $db] = $this->backwardPropagation($A, $X, $Y); | ||
|
||
// Update parameters | ||
[$W, $b] = $this->updateParams($W, $b, $dW, $db, $learningRate); | ||
|
||
if ($i % 100 == 0) { | ||
echo "Iteration {$i} - Cost: {$cost}\n"; | ||
} | ||
} | ||
|
||
return [$W, $b]; | ||
} | ||
|
||
/** | ||
* @param array $X | ||
* @param array $W | ||
* @param float $b | ||
* @return array | ||
*/ | ||
public function predict(array $X, array $W, float $b): array | ||
{ | ||
$A = $this->forwardPropagation($X, $W, $b); | ||
return array_map(fn($a) => $a > 0.5 ? 1 : 0, $A); | ||
} | ||
|
||
/** | ||
* Stage 1. Prepare dataset | ||
* @return array[] | ||
*/ | ||
public function generateTrainingSet(): array | ||
{ | ||
$m = 50; | ||
|
||
// Generate a 2 x m matrix with binary values (0 or 1) | ||
$X = []; | ||
for ($i = 0; $i < 2; $i++) { | ||
for ($j = 0; $j < $m; $j++) { | ||
$X[$i][$j] = rand(0, 1); | ||
} | ||
} | ||
|
||
// Compute Y: Logical AND condition (X[0] == 1 and X[1] == 0) | ||
$Y = []; | ||
for ($j = 0; $j < $m; $j++) { | ||
$Y[$j] = ($X[0][$j] == 1 && $X[1][$j] == 0) ? 1 : 0; | ||
} | ||
|
||
return [$X, $Y]; | ||
} | ||
|
||
/** | ||
* Stage 2. Initialize model parameters | ||
* @param int $n Number of features | ||
* @return array [$W, $b] Weight and bias arrays | ||
*/ | ||
private function initParams(int $n): array | ||
{ | ||
$W = []; | ||
for ($i = 0; $i < $n; $i++) { | ||
$W[$i] = mt_rand() / mt_getrandmax(); // Small random values | ||
} | ||
$b = 0.0; // Bias initialized to zero | ||
return [$W, $b]; | ||
} | ||
|
||
/** | ||
* Sigmoid Activation Function | ||
* @param float $z | ||
* @return float | ||
*/ | ||
private function sigmoid(float $z): float | ||
{ | ||
return 1 / (1 + exp(-$z)); | ||
} | ||
|
||
/** | ||
* Stage 3. Forward Propagation | ||
* @param array $X | ||
* @param array $W | ||
* @param float $b | ||
* @return array | ||
*/ | ||
private function forwardPropagation(array $X, array $W, float $b): array | ||
{ | ||
$Z = []; | ||
for ($j = 0; $j < count($X[0]); $j++) { | ||
$sum = $b; | ||
for ($i = 0; $i < count($W); $i++) { | ||
$sum += $W[$i] * $X[$i][$j]; | ||
} | ||
$Z[$j] = $this->sigmoid($sum); | ||
} | ||
return $Z; | ||
} | ||
|
||
/** | ||
* Stage 4. Compute Cost Function (Binary Cross-Entropy Loss) | ||
* @param array $A | ||
* @param array $Y | ||
* @return float | ||
*/ | ||
private function computeCost(array $A, array $Y): float | ||
{ | ||
$m = count($Y); | ||
$cost = 0.0; | ||
for ($i = 0; $i < $m; $i++) { | ||
$cost += -($Y[$i] * log($A[$i]) + (1 - $Y[$i]) * log(1 - $A[$i])); | ||
} | ||
return $cost / $m; | ||
} | ||
|
||
/** | ||
* Stage 5. Backward Propagation | ||
* @param array $A | ||
* @param array $X | ||
* @param array $Y | ||
* @return array | ||
*/ | ||
private function backwardPropagation(array $A, array $X, array $Y): array | ||
{ | ||
$m = count($Y); | ||
$dW = array_fill(0, count($X), 0.0); | ||
$db = 0.0; | ||
|
||
for ($j = 0; $j < $m; $j++) { | ||
$dZ = $A[$j] - $Y[$j]; | ||
for ($i = 0; $i < count($X); $i++) { | ||
$dW[$i] += $dZ * $X[$i][$j]; | ||
} | ||
$db += $dZ; | ||
} | ||
|
||
// Average gradients | ||
for ($i = 0; $i < count($dW); $i++) { | ||
$dW[$i] /= $m; | ||
} | ||
$db /= $m; | ||
|
||
return [$dW, $db]; | ||
} | ||
|
||
/** | ||
* STage 6. Update Parameters | ||
* @param array $W | ||
* @param float $b | ||
* @param array $dW | ||
* @param float $db | ||
* @param float $learningRate | ||
* @return array | ||
*/ | ||
private function updateParams(array $W, float $b, array $dW, float $db, float $learningRate): array | ||
{ | ||
for ($i = 0; $i < count($W); $i++) { | ||
$W[$i] -= $learningRate * $dW[$i]; | ||
} | ||
$b -= $learningRate * $db; | ||
|
||
return [$W, $b]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
## Maths behind the single Perceptron Neural Network with Activation Function | ||
|
||
This work is based on examples from course https://www.coursera.org/learn/machine-learning-calculus prepared by author Luis Serrano. | ||
|
||
Linear separation refers to data points in binary classification problems that can be separated by a linear decision boundary. | ||
If the data points can be separated by a line, linear function, or flat hyperplane, they are said to be linearly separable. | ||
|
||
If separate points in an n-dimensional space exist, then it is said to be linearly separable | ||
|
||
$$w_1x_1 + w_2x_2 + w_nx_n + b = 0$$ | ||
|
||
For two-dimensional input data, if there is a line, whose equation is $$w_1x_1 + w_2x_2 + b = 0$$ | ||
|
||
that separates all samples of one class from the other class, then the corresponding observation can be derived from the equation of the separating line. | ||
Such classification problems are called "linearly separable", i.e. separating by linear combination. | ||
|
||
<img src="chart/linear-separated.png" /> | ||
|
||
The input layer contains two nodes $x_1$ and $x_2$. Weight vector $W = \begin{bmatrix} w_1 & w_2\end{bmatrix}$ and bias ($b$) are the parameters to be updated during the model training. | ||
|
||
$$z^{(i)} = w_1x_1^{(i)} + w_2x_2^{(i)} + b = Wx^{(i)} + b.\tag{1}$$ | ||
|
||
To be able to perform classification we need nonlinear approach. This can achieved with sigmoid activation function which roughly replace values with nearly 0 or nearly 1 for most cases and some values between for small range near 0. | ||
|
||
$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$ | ||
|
||
Sigmoid activation function is defined as | ||
|
||
$$a = \sigma\left(z\right) = \frac{1}{1+e^{-z}}.\tag{2}$$ | ||
|
||
<img src="chart/sigmoid.png" /> | ||
|
||
Threshold value of $0.5$ can be used for predictions: $1$ (red) if $a > 0.5$ and $0$ (blue) otherwise. | ||
|
||
The single perceptron neural network with sigmoid activation function can be expressed as: | ||
|
||
\begin{align} | ||
z^{(i)} &= W x^{(i)} + b,\\ | ||
a^{(i)} &= \sigma\left(z^{(i)}\right).\\\tag{3} | ||
\end{align} | ||
|
||
|
||
With $m$ training examples organised in the columns of ($2 \times m$) matrix $X$, you can apply the activation function element-wise. So the model can be written as: | ||
|
||
|
||
\begin {align} | ||
Z &= W X + b,\\ | ||
A &= \sigma\left(Z\right),\\\tag{4} | ||
\end{align} | ||
|
||
When dealing with classification problems, the most commonly used cost function is the **log loss**, which is described by the following equation | ||
|
||
$$\mathcal{L}\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m} L\left(W, b\right) = \frac{1}{m}\sum_{i=1}^{m} \large\left(\small -y^{(i)}\log\left(a^{(i)}\right) - (1-y^{(i)})\log\left(1- a^{(i)}\right) \large \right) \small,\tag{5}$$ | ||
|
||
where $y^{(i)} \in \{0,1\}$ are the original labels and $a^{(i)}$ are the continuous output values of the forward propagation step (elements of array $A$). | ||
|
||
|
||
We want to minimize the cost function during the training. To implement gradient descent, calculate partial derivatives using chain rule | ||
|
||
|
||
\begin{align} | ||
\frac{\partial \mathcal{L} }{ \partial w_1 } &= | ||
\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_1^{(i)},\\ | ||
\frac{\partial \mathcal{L} }{ \partial w_2 } &= | ||
\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right)x_2^{(i)},\tag{7}\\ | ||
\frac{\partial \mathcal{L} }{ \partial b } &= | ||
\frac{1}{m}\sum_{i=1}^{m} \left(a^{(i)} - y^{(i)}\right). | ||
\end{align} | ||
|
||
Equations above can be rewritten in a matrix form | ||
|
||
|
||
\begin{align} | ||
\frac{\partial \mathcal{L} }{ \partial W } &= | ||
\begin{bmatrix} \frac{\partial \mathcal{L} }{ \partial w_1 } & | ||
\frac{\partial \mathcal{L} }{ \partial w_2 }\end{bmatrix} = \frac{1}{m}\left(A - Y\right)X^T,\\ | ||
\frac{\partial \mathcal{L} }{ \partial b } &= \frac{1}{m}\left(A - Y\right)\mathbf{1}. | ||
\tag{8} | ||
\end{align} | ||
|
||
where $\left(A - Y\right)$ is an array of a shape ($1 \times m$), $X^T$ is an array of a shape ($m \times 2$) and $\mathbf{1}$ is just a ($m \times 1$) vector of ones. | ||
|
||
Then you can update the parameters: | ||
|
||
\begin{align} | ||
W &= W - \alpha \frac{\partial \mathcal{L} }{ \partial W },\\ | ||
b &= b - \alpha \frac{\partial \mathcal{L} }{ \partial b }, | ||
\tag{9}\end{align} | ||
|
||
where $\alpha$ is the learning rate. Repeat the process in a loop until the cost function stops decreasing. | ||
|
||
in last step apply activation | ||
$$\hat{y} = \begin{cases} 1 & \mbox{if } a > 0.5 \\ 0 & \mbox{otherwise } \end{cases}\tag{10}$$ | ||
|
||
|
||
### Dataset | ||
|
||
As a dataset we will generate $m=50$ data points $(x_1, x_2)$, where $x_1, x_2 \in \{0,1\}$ and save them in the `NumPy` array `X` of a shape $(2 \times m)$. The labels ($0$: blue, $1$: red) will be calculated so that $y = 1$ if $x_1 = 1$ and $x_2 = 0$, in the rest of the cases $y=0$. The labels will be saved in the array `Y` of a shape $(1 \times m)$. | ||
|
||
<img src="chart/dataset.png" /> |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions
23
tests/NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifierTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<?php | ||
|
||
namespace NeuralNetworks\PerceptronClassifier; | ||
|
||
require_once __DIR__ . '/../../../vendor/autoload.php'; | ||
require_once __DIR__ . '/../../../NeuralNetworks/PerceptronClassifier/NeuralNetworkPerceptronClassifier.php'; | ||
|
||
use PHPUnit\Framework\TestCase; | ||
|
||
class NeuralNetworkPerceptronClassifierTest extends TestCase | ||
{ | ||
public function testNeuralNetworkPerceptronClassification() | ||
{ | ||
$nnClassifier = new NeuralNetworkPerceptronClassifier(); | ||
[$X, $Y] = $nnClassifier->generateTrainingSet(); | ||
// Train the model | ||
[$W, $b] = $nnClassifier->trainModel($X, $Y, 1000, 0.1); | ||
|
||
// Make predictions | ||
$predictions = $nnClassifier->predict([[0, 0, 1, 1], [0, 1, 1, 0]], $W, $b); | ||
$this->assertEquals([0, 0, 0, 1], $predictions); | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for adding this additional documentation on PerceptronClassifier