merged dev into master

nickgillian · Dec 4, 2016 · b0f491e · b0f491e
2 parents 1e5f686 + 55ce59b
commit b0f491e
Show file tree

Hide file tree

Showing 254 changed files with 7,443 additions and 4,120 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -7,8 +7,8 @@ compiler:
  - clang
  - g++
 env:
-  - CONFIG=Release
-  - CONFIG=Debug
+ - CONFIG=Release
+ - CONFIG=Debug
 before_install:
  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update          ; fi
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test ; fi
@@ -30,4 +30,4 @@ script:
 branches:
   only:
     - master
-    - dev
+    - dev
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,8 +2,26 @@
 
 This file contains some notes about significant changes to the GRT.
 
+# Version 0.2.4
+
+## 4th December 2016
+- **Better unit tests:** improved unit tests for MLP neural nets and several GRT classifiers
+- **Added Metrics class:** the Metrics class will act as the main backend for computing the various ML metrics needed for all GRT classification and regression modules
+- **Added Training and Validation metrics:** all GRT classifiers now automatically compute training accuracy and validation accuracy (if a validation set is used) when training the classification model. These metrics can be accessed via: classifier.getTrainingSetAccuracy(); classifier.getValidationSetAccuracy();
+- **Improved GMM and GaussianMixtureModels algorithm:** improved both the GMM and GaussianMixtureModels algorithms by adding a restart option, this allows the learning algorithm to reattempt to train a model if the previous attempt fails to converge
+- **Added classifier unit test helper function:** added a base class for computing common unit tests for GRT classification modules
+- **Updated GRT GUI:** updated GRT GUI to support latest neuron API
+
 # Version 0.2.3
 
+## 24th October 2016
+- **C++11:** moved to using C++11 as default
+
+## 22nd October 2016
+- **backend updates:** set all classification, regression, cluster, preprocessing, and postprocessing modules to use new ID and base class functionality
+- **unit tests:** working to improve and extend all GRT unit tests
+- **removed LDA:** removing old algorithm as it has never been fully implemented, will add back again when it is complete
+
 ## 29th September 2016
 - **added RMSFilter class:** new filter class for root mean squared low pass filtering of data, in PreProcessing
 - **moved various PreProcessing init to PreProcessing base class:** moved several common init work to PreProcessing base class, such as module naming and init of logs

diff --git a/GRT/ClassificationModules/ANBC/ANBC.cpp b/GRT/ClassificationModules/ANBC/ANBC.cpp
@@ -23,8 +23,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 GRT_BEGIN_NAMESPACE
 
-//Define the string that will be used to indentify the object
-std::string ANBC::id = "ANBC";
+//Define the string that will be used to identify the object
+const std::string ANBC::id = "ANBC";
 std::string ANBC::getId() { return ANBC::id; }
 
 //Register the ANBC module with the Classifier base class
@@ -67,9 +67,10 @@ bool ANBC::deepCopyFrom(const Classifier *classifier){
 
     if( classifier == NULL ) return false;
 
-    if( this->getClassifierType() == classifier->getClassifierType() ){
+    if( this->getId() == classifier->getId() ){
 
-        ANBC *ptr = (ANBC*)classifier;
+        const ANBC *ptr = dynamic_cast<const ANBC*>(classifier);
+
         //Clone the ANBC values
         this->weightsDataSet = ptr->weightsDataSet;
         this->weightsData = ptr->weightsData;
@@ -86,11 +87,10 @@ bool ANBC::train_(ClassificationData &trainingData){
     //Clear any previous model
     clear();
 
-    const unsigned int M = trainingData.getNumSamples();
     const unsigned int N = trainingData.getNumDimensions();
     const unsigned int K = trainingData.getNumClasses();
 
-    if( M == 0 ){
+    if( trainingData.getNumSamples() == 0 ){
         errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
         return false;
     }
@@ -103,16 +103,25 @@ bool ANBC::train_(ClassificationData &trainingData){
     }
 
     numInputDimensions = N;
+    numOutputDimensions = K;
     numClasses = K;
     models.resize(K);
     classLabels.resize(K);
     ranges = trainingData.getRanges();
+    ClassificationData validationData;
 
     //Scale the training data if needed
     if( useScaling ){
         //Scale the training data between 0 and 1
         trainingData.scale(0, 1);
     }
+
+    if( useValidationSet ){
+        validationData = trainingData.split( 100-validationSetSize );
+    }
+
+    const UINT M = trainingData.getNumSamples();
+    trainingLog << "Training Naive Bayes model, num training examples: " << M << ", num validation examples: " << validationData.getNumSamples() << ", num classes: " << numClasses << std::endl;
 
     //Train each of the models
     for(UINT k=0; k<numClasses; k++){
@@ -185,8 +194,40 @@ bool ANBC::train_(ClassificationData &trainingData){
         nullRejectionThresholds[k] = models[k].threshold;
     }
 
-    //Flag that the models have been trained
+    //Flag that the model has been trained
     trained = true;
+
+    //Compute the final training stats
+    trainingSetAccuracy = 0;
+    validationSetAccuracy = 0;
+
+    //If scaling was on, then the data will already be scaled, so turn it off temporially so we can test the model accuracy
+    bool scalingState = useScaling;
+    useScaling = false;
+    if( !computeAccuracy( trainingData, trainingSetAccuracy ) ){
+        trained = false;
+        errorLog << "Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
+        return false;
+    }
+
+    if( useValidationSet ){
+        if( !computeAccuracy( validationData, validationSetAccuracy ) ){
+            trained = false;
+            errorLog << "Failed to compute validation set accuracy! Failed to fully train model!" << std::endl;
+            return false;
+        }
+
+    }
+
+    trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl;
+
+    if( useValidationSet ){
+        trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
+    }
+
+    //Reset the scaling state for future prediction
+    useScaling = scalingState;
+
     return trained;
 }
 
@@ -203,7 +244,7 @@ bool ANBC::predict_(VectorFloat &inputVector){
     if( !trained ) return false;
 
     if( inputVector.size() != numInputDimensions ){
-        errorLog << "predict_(VectorFloat &inputVector) - The size of the input vector (" << inputVector.size() << ") does not match the num features in the model (" << numInputDimensions << std::endl;
+        errorLog << "predict_(VectorFloat &inputVector) - The size of the input vector (" << inputVector.getSize() << ") does not match the num features in the model (" << numInputDimensions << std::endl;
         return false;
     }
 
@@ -217,7 +258,7 @@ bool ANBC::predict_(VectorFloat &inputVector){
     if( classDistances.size() != numClasses ) classDistances.resize(numClasses,0);
 
     Float classLikelihoodsSum = 0;
-    Float minDist = -99e+99;
+    Float minDist = 0;
     for(UINT k=0; k<numClasses; k++){
         classDistances[k] = models[k].predict( inputVector );
 
@@ -232,7 +273,7 @@ bool ANBC::predict_(VectorFloat &inputVector){
             classLikelihoodsSum += classLikelihoods[k];
 
             //The loglikelihood values are negative so we want the values closest to 0
-            if( classDistances[k] > minDist ){
+            if( classDistances[k] > minDist || k==0 ){
                 minDist = classDistances[k];
                 predictedClassLabel = k;
             }

diff --git a/GRT/ClassificationModules/ANBC/ANBC.h b/GRT/ClassificationModules/ANBC/ANBC.h
@@ -1,19 +1,6 @@
 /**
 @file
 @author  Nicholas Gillian <[email protected]>
-@version 1.0
-
-@brief This class implements the Adaptive Naive Bayes Classifier algorithm.  The Adaptive Naive Bayes Classifier (ANBC) is a naive but powerful classifier that works very well on both basic and more complex recognition problems.
-
-The ANBC algorithm is a supervised learning algorithm that can be used to classify any type of N-dimensional signal. The ANBC algorithm essentially works by fitting an N-dimensional Gaussian distribution to each class (i.e. gesture) during the training phase. New gestures can then be recognized in the prediction phase by finding the gesture that results in the maximum likelihood value (given the new sensor data and each of the Gaussian distributions). The ANBC algorithm also computes rejection thresholds that enable the algorithm to automatically reject sensor values that are not the K gestures the algorithm has been trained to recognized (without being explicitly told during the prediction phase if a gesture is, or is not, being performed).
-
-In addition, the ANBC algorithm enables you to weight the importance of each dimension for each gesture. For instance, imagine that you want to create a recognition system that can recognize a user's left-handed gestures, right-handed gestures, and two-handed gestures. To track the user's movements you use a depth sensor and skeleton-tracking algorithm that can track any user who stands in front of the depth sensor and sends out the x-y-z joint position of the user's two hands (along with the user's other joints) at 30Hz. You use the 3-dimensional joint data for each hand to create a 6-dimensional vector (containing {leftHandX, leftHandY, leftHandZ, rightHandX, rightHandY, rightHandZ}) as input to the ANBC algorithm. The ANBC algorithm enables you to weight each dimension of this vector for each of the 3 types of gestures you want to recognize (i.e. left-handed, right-handed, and two-handed gestures), so for a left-handed gesture you would set the weights for this class to {1,1,1,0,0,0}, for the right-handed gesture you would set the weights for this class to {0,0,0,1,1,1}, and for the two-handed gesture you would set the weights for this class to {1,1,1,1,1,1}. You only need to set these weights values once, before you train the ANBC model, the weights will then automatically be incorporated into the Gaussian models for each gesture (and therefore nothing special needs to be done for the prediction phase). You can set the weights using the setWeights(LabelledClassificationData weightsData) function.
-
-The ANBC algorithm is part of the GRT classification modules.
-
-@remark You can find out more about the ANBC algorithm in <a href="http://www.nickgillian.com/papers/Gillian_ANBC.pdf">ANBC.pdf</a>.
-
-@example ClassificationModulesExamples/ANBCExample/ANBCExample.cpp
 */
 
 /**
@@ -47,6 +34,19 @@ GRT_BEGIN_NAMESPACE
 #define MIN_SCALE_VALUE 1.0e-10
 #define MAX_SCALE_VALUE 1
 
+/**
+@brief This class implements the Adaptive Naive Bayes Classifier algorithm.  The Adaptive Naive Bayes Classifier (ANBC) is a naive but powerful classifier that works very well on both basic and more complex recognition problems.
+
+The ANBC algorithm is a supervised learning algorithm that can be used to classify any type of N-dimensional signal. The ANBC algorithm essentially works by fitting an N-dimensional Gaussian distribution to each class (i.e. gesture) during the training phase. New gestures can then be recognized in the prediction phase by finding the gesture that results in the maximum likelihood value (given the new sensor data and each of the Gaussian distributions). The ANBC algorithm also computes rejection thresholds that enable the algorithm to automatically reject sensor values that are not the K gestures the algorithm has been trained to recognized (without being explicitly told during the prediction phase if a gesture is, or is not, being performed).
+
+In addition, the ANBC algorithm enables you to weight the importance of each dimension for each gesture. For instance, imagine that you want to create a recognition system that can recognize a user's left-handed gestures, right-handed gestures, and two-handed gestures. To track the user's movements you use a depth sensor and skeleton-tracking algorithm that can track any user who stands in front of the depth sensor and sends out the x-y-z joint position of the user's two hands (along with the user's other joints) at 30Hz. You use the 3-dimensional joint data for each hand to create a 6-dimensional vector (containing {leftHandX, leftHandY, leftHandZ, rightHandX, rightHandY, rightHandZ}) as input to the ANBC algorithm. The ANBC algorithm enables you to weight each dimension of this vector for each of the 3 types of gestures you want to recognize (i.e. left-handed, right-handed, and two-handed gestures), so for a left-handed gesture you would set the weights for this class to {1,1,1,0,0,0}, for the right-handed gesture you would set the weights for this class to {0,0,0,1,1,1}, and for the two-handed gesture you would set the weights for this class to {1,1,1,1,1,1}. You only need to set these weights values once, before you train the ANBC model, the weights will then automatically be incorporated into the Gaussian models for each gesture (and therefore nothing special needs to be done for the prediction phase). You can set the weights using the setWeights(LabelledClassificationData weightsData) function.
+
+The ANBC algorithm is part of the GRT classification modules.
+
+@remark You can find out more about the ANBC algorithm in <a href="http://www.nickgillian.com/papers/Gillian_ANBC.pdf">ANBC.pdf</a>.
+
+@example ClassificationModulesExamples/ANBCExample/ANBCExample.cpp
+*/
 class GRT_API ANBC : public Classifier
 {
 public:
@@ -208,8 +208,9 @@ class GRT_API ANBC : public Classifier
     ClassificationData weightsData;       //The weights of each feature for each class for training the algorithm
     Vector< ANBC_Model > models;          //A buffer to hold all the models
 
+private:
     static RegisterClassifierModule< ANBC > registerModule;
-    static std::string id;
+    static const std::string id;
 };
 
 GRT_END_NAMESPACE