From 61299e16ec3da2e9534272a3484e7ac7e526cf0f Mon Sep 17 00:00:00 2001 From: Brad King Date: Fri, 24 Jan 2025 14:17:00 -0500 Subject: [PATCH 1/4] ENH: Add script to update libLBFGS from upstream Issue: #3135 --- .../ThirdParty/libLBFGS/UpdateFromUpstream.sh | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100755 Modules/ThirdParty/libLBFGS/UpdateFromUpstream.sh diff --git a/Modules/ThirdParty/libLBFGS/UpdateFromUpstream.sh b/Modules/ThirdParty/libLBFGS/UpdateFromUpstream.sh new file mode 100755 index 00000000000..d3665ab9aed --- /dev/null +++ b/Modules/ThirdParty/libLBFGS/UpdateFromUpstream.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -e +set -x +shopt -s dotglob + +readonly name="libLBFGS" +readonly ownership="libLBFGS Upstream " +readonly subtree="Modules/ThirdParty/libLBFGS/src/itklbfgs" +readonly repo="https://github.com/chokkan/liblbfgs.git" +readonly tag="v1.10" +readonly paths=" +COPYING +include/*.h +lib/*.c +lib/*.h +" + +extract_source () { + git_archive + pushd "${extractdir}/${name}-reduced" + echo "* -whitespace" >> .gitattributes + popd +} + +. "${BASH_SOURCE%/*}/../../../Utilities/Maintenance/update-third-party.bash" From 9c42aa0d8abcca0ce86b83c3817fa404ece2bf23 Mon Sep 17 00:00:00 2001 From: Brad King Date: Fri, 24 Jan 2025 14:27:17 -0500 Subject: [PATCH 2/4] ENH: Remove libLBFGS sources to make room for fresh import --- Modules/ThirdParty/libLBFGS/include/lbfgs.h | 748 --------- .../ThirdParty/libLBFGS/src/arithmetic_ansi.h | 133 -- .../libLBFGS/src/arithmetic_sse_double.h | 294 ---- .../libLBFGS/src/arithmetic_sse_float.h | 298 ---- Modules/ThirdParty/libLBFGS/src/lbfgs.c | 1371 ----------------- 5 files changed, 2844 deletions(-) delete mode 100644 Modules/ThirdParty/libLBFGS/include/lbfgs.h delete mode 100644 Modules/ThirdParty/libLBFGS/src/arithmetic_ansi.h delete mode 100644 Modules/ThirdParty/libLBFGS/src/arithmetic_sse_double.h delete mode 100644 Modules/ThirdParty/libLBFGS/src/arithmetic_sse_float.h delete mode 100644 Modules/ThirdParty/libLBFGS/src/lbfgs.c diff --git a/Modules/ThirdParty/libLBFGS/include/lbfgs.h b/Modules/ThirdParty/libLBFGS/include/lbfgs.h deleted file mode 100644 index 4a4ed0d2d93..00000000000 --- a/Modules/ThirdParty/libLBFGS/include/lbfgs.h +++ /dev/null @@ -1,748 +0,0 @@ -/* - * C library of Limited memory BFGS (L-BFGS). - * - * Copyright (c) 1990, Jorge Nocedal - * Copyright (c) 2007-2010 Naoaki Okazaki - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* $Id$ */ -#ifdef USE_SSE -#include -#endif - -#ifndef __LBFGS_H__ -#define __LBFGS_H__ - -#ifdef __cplusplus -extern "C" { -#endif/*__cplusplus*/ - -/* - * The default precision of floating point values is 64bit (double). - */ -#ifndef LBFGS_FLOAT -#define LBFGS_FLOAT 64 -#endif/*LBFGS_FLOAT*/ - -/* - * Activate optimization routines for IEEE754 floating point values. - */ -#ifndef LBFGS_IEEE_FLOAT -#define LBFGS_IEEE_FLOAT 1 -#endif/*LBFGS_IEEE_FLOAT*/ - -#if LBFGS_FLOAT == 32 -typedef float lbfgsfloatval_t; - -#elif LBFGS_FLOAT == 64 -typedef double lbfgsfloatval_t; - -#else -#error "libLBFGS supports single (float; LBFGS_FLOAT = 32) or double (double; LBFGS_FLOAT=64) precision only." - -#endif - - -/** - * \addtogroup liblbfgs_api libLBFGS API - * @{ - * - * The libLBFGS API. - */ - -/** - * Return values of lbfgs(). - * - * Roughly speaking, a negative value indicates an error. - */ -enum { - /** L-BFGS reaches convergence. */ - LBFGS_SUCCESS = 0, - LBFGS_CONVERGENCE = 0, - LBFGS_STOP, - /** The initial variables already minimize the objective function. */ - LBFGS_ALREADY_MINIMIZED, - - /** Unknown error. */ - LBFGSERR_UNKNOWNERROR = -1024, - /** Logic error. */ - LBFGSERR_LOGICERROR, - /** Insufficient memory. */ - LBFGSERR_OUTOFMEMORY, - /** The minimization process has been canceled. */ - LBFGSERR_CANCELED, - /** Invalid number of variables specified. */ - LBFGSERR_INVALID_N, - /** Invalid number of variables (for SSE) specified. */ - LBFGSERR_INVALID_N_SSE, - /** The array x must be aligned to 16 (for SSE). */ - LBFGSERR_INVALID_X_SSE, - /** Invalid parameter lbfgs_parameter_t::epsilon specified. */ - LBFGSERR_INVALID_EPSILON, - /** Invalid parameter lbfgs_parameter_t::past specified. */ - LBFGSERR_INVALID_TESTPERIOD, - /** Invalid parameter lbfgs_parameter_t::delta specified. */ - LBFGSERR_INVALID_DELTA, - /** Invalid parameter lbfgs_parameter_t::linesearch specified. */ - LBFGSERR_INVALID_LINESEARCH, - /** Invalid parameter lbfgs_parameter_t::max_step specified. */ - LBFGSERR_INVALID_MINSTEP, - /** Invalid parameter lbfgs_parameter_t::max_step specified. */ - LBFGSERR_INVALID_MAXSTEP, - /** Invalid parameter lbfgs_parameter_t::ftol specified. */ - LBFGSERR_INVALID_FTOL, - /** Invalid parameter lbfgs_parameter_t::wolfe specified. */ - LBFGSERR_INVALID_WOLFE, - /** Invalid parameter lbfgs_parameter_t::gtol specified. */ - LBFGSERR_INVALID_GTOL, - /** Invalid parameter lbfgs_parameter_t::xtol specified. */ - LBFGSERR_INVALID_XTOL, - /** Invalid parameter lbfgs_parameter_t::max_linesearch specified. */ - LBFGSERR_INVALID_MAXLINESEARCH, - /** Invalid parameter lbfgs_parameter_t::orthantwise_c specified. */ - LBFGSERR_INVALID_ORTHANTWISE, - /** Invalid parameter lbfgs_parameter_t::orthantwise_start specified. */ - LBFGSERR_INVALID_ORTHANTWISE_START, - /** Invalid parameter lbfgs_parameter_t::orthantwise_end specified. */ - LBFGSERR_INVALID_ORTHANTWISE_END, - /** The line-search step went out of the interval of uncertainty. */ - LBFGSERR_OUTOFINTERVAL, - /** A logic error occurred; alternatively, the interval of uncertainty - became too small. */ - LBFGSERR_INCORRECT_TMINMAX, - /** A rounding error occurred; alternatively, no line-search step - satisfies the sufficient decrease and curvature conditions. */ - LBFGSERR_ROUNDING_ERROR, - /** The line-search step became smaller than lbfgs_parameter_t::min_step. */ - LBFGSERR_MINIMUMSTEP, - /** The line-search step became larger than lbfgs_parameter_t::max_step. */ - LBFGSERR_MAXIMUMSTEP, - /** The line-search routine reaches the maximum number of evaluations. */ - LBFGSERR_MAXIMUMLINESEARCH, - /** The algorithm routine reaches the maximum number of iterations. */ - LBFGSERR_MAXIMUMITERATION, - /** Relative width of the interval of uncertainty is at most - lbfgs_parameter_t::xtol. */ - LBFGSERR_WIDTHTOOSMALL, - /** A logic error (negative line-search step) occurred. */ - LBFGSERR_INVALIDPARAMETERS, - /** The current search direction increases the objective function value. */ - LBFGSERR_INCREASEGRADIENT, -}; - -/** - * Line search algorithms. - */ -enum { - /** The default algorithm (MoreThuente method). */ - LBFGS_LINESEARCH_DEFAULT = 0, - /** MoreThuente method proposd by More and Thuente. */ - LBFGS_LINESEARCH_MORETHUENTE = 0, - /** - * Backtracking method with the Armijo condition. - * The backtracking method finds the step length such that it satisfies - * the sufficient decrease (Armijo) condition, - * - f(x + a * d) <= f(x) + lbfgs_parameter_t::ftol * a * g(x)^T d, - * - * where x is the current point, d is the current search direction, and - * a is the step length. - */ - LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1, - /** The backtracking method with the defualt (regular Wolfe) condition. */ - LBFGS_LINESEARCH_BACKTRACKING = 2, - /** - * Backtracking method with regular Wolfe condition. - * The backtracking method finds the step length such that it satisfies - * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) - * and the curvature condition, - * - g(x + a * d)^T d >= lbfgs_parameter_t::wolfe * g(x)^T d, - * - * where x is the current point, d is the current search direction, and - * a is the step length. - */ - LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2, - /** - * Backtracking method with strong Wolfe condition. - * The backtracking method finds the step length such that it satisfies - * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) - * and the following condition, - * - |g(x + a * d)^T d| <= lbfgs_parameter_t::wolfe * |g(x)^T d|, - * - * where x is the current point, d is the current search direction, and - * a is the step length. - */ - LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3, -}; - -/** - * L-BFGS optimization parameters. - * Call lbfgs_parameter_init() function to initialize parameters to the - * default values. - */ -typedef struct { - /** - * The number of corrections to approximate the inverse hessian matrix. - * The L-BFGS routine stores the computation results of previous \ref m - * iterations to approximate the inverse hessian matrix of the current - * iteration. This parameter controls the size of the limited memories - * (corrections). The default value is \c 6. Values less than \c 3 are - * not recommended. Large values will result in excessive computing time. - */ - int m; - - /** - * Epsilon for convergence test. - * This parameter determines the accuracy with which the solution is to - * be found. A minimization terminates when - * ||g|| < \ref epsilon * max(1, ||x||), - * where ||.|| denotes the Euclidean (L2) norm. The default value is - * \c 1e-5. - */ - lbfgsfloatval_t epsilon; - - /** - * Distance for delta-based convergence test. - * This parameter determines the distance, in iterations, to compute - * the rate of decrease of the objective function. If the value of this - * parameter is zero, the library does not perform the delta-based - * convergence test. The default value is \c 0. - */ - int past; - - /** - * Delta for convergence test. - * This parameter determines the minimum rate of decrease of the - * objective function. The library stops iterations when the - * following condition is met: - * (f' - f) / f < \ref delta, - * where f' is the objective value of \ref past iterations ago, and f is - * the objective value of the current iteration. - * The default value is \c 0. - */ - lbfgsfloatval_t delta; - - /** - * The maximum number of iterations. - * The lbfgs() function terminates an optimization process with - * ::LBFGSERR_MAXIMUMITERATION status code when the iteration count - * exceedes this parameter. Setting this parameter to zero continues an - * optimization process until a convergence or error. The default value - * is \c 0. - */ - int max_iterations; - - /** - * The line search algorithm. - * This parameter specifies a line search algorithm to be used by the - * L-BFGS routine. - */ - int linesearch; - - /** - * The maximum number of trials for the line search. - * This parameter controls the number of function and gradients evaluations - * per iteration for the line search routine. The default value is \c 20. - */ - int max_linesearch; - - /** - * The minimum step of the line search routine. - * The default value is \c 1e-20. This value need not be modified unless - * the exponents are too large for the machine being used, or unless the - * problem is extremely badly scaled (in which case the exponents should - * be increased). - */ - lbfgsfloatval_t min_step; - - /** - * The maximum step of the line search. - * The default value is \c 1e+20. This value need not be modified unless - * the exponents are too large for the machine being used, or unless the - * problem is extremely badly scaled (in which case the exponents should - * be increased). - */ - lbfgsfloatval_t max_step; - - /** - * A parameter to control the accuracy of the line search routine. - * The default value is \c 1e-4. This parameter should be greater - * than zero and smaller than \c 0.5. - */ - lbfgsfloatval_t ftol; - - /** - * A coefficient for the Wolfe condition. - * This parameter is valid only when the backtracking line-search - * algorithm is used with the Wolfe condition, - * ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or - * ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE . - * The default value is \c 0.9. This parameter should be greater - * the \ref ftol parameter and smaller than \c 1.0. - */ - lbfgsfloatval_t wolfe; - - /** - * A parameter to control the accuracy of the line search routine. - * The default value is \c 0.9. If the function and gradient - * evaluations are inexpensive with respect to the cost of the - * iteration (which is sometimes the case when solving very large - * problems) it may be advantageous to set this parameter to a small - * value. A typical small value is \c 0.1. This parameter shuold be - * greater than the \ref ftol parameter (\c 1e-4) and smaller than - * \c 1.0. - */ - lbfgsfloatval_t gtol; - - /** - * The machine precision for floating-point values. - * This parameter must be a positive value set by a client program to - * estimate the machine precision. The line search routine will terminate - * with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width - * of the interval of uncertainty is less than this parameter. - */ - lbfgsfloatval_t xtol; - - /** - * Coeefficient for the L1 norm of variables. - * This parameter should be set to zero for standard minimization - * problems. Setting this parameter to a positive value activates - * Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which - * minimizes the objective function F(x) combined with the L1 norm |x| - * of the variables, {F(x) + C |x|}. This parameter is the coeefficient - * for the |x|, i.e., C. As the L1 norm |x| is not differentiable at - * zero, the library modifies function and gradient evaluations from - * a client program suitably; a client program thus have only to return - * the function value F(x) and gradients G(x) as usual. The default value - * is zero. - */ - lbfgsfloatval_t orthantwise_c; - - /** - * Start index for computing L1 norm of the variables. - * This parameter is valid only for OWL-QN method - * (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N) - * specifies the index number from which the library computes the - * L1 norm of the variables x, - * |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| . - * In other words, variables x_1, ..., x_{b-1} are not used for - * computing the L1 norm. Setting b (0 < b < N), one can protect - * variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic - * regression) from being regularized. The default value is zero. - */ - int orthantwise_start; - - /** - * End index for computing L1 norm of the variables. - * This parameter is valid only for OWL-QN method - * (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N) - * specifies the index number at which the library stops computing the - * L1 norm of the variables x, - */ - int orthantwise_end; -} lbfgs_parameter_t; - - -/** - * Callback interface to provide objective function and gradient evaluations. - * - * The lbfgs() function call this function to obtain the values of objective - * function and its gradients when needed. A client program must implement - * this function to evaluate the values of the objective function and its - * gradients, given current values of variables. - * - * @param instance The user data sent for lbfgs() function by the client. - * @param x The current values of variables. - * @param g The gradient vector. The callback function must compute - * the gradient values for the current variables. - * @param n The number of variables. - * @param step The current step of the line search routine. - * @retval lbfgsfloatval_t The value of the objective function for the current - * variables. - */ -typedef lbfgsfloatval_t (*lbfgs_evaluate_t)( - void *instance, - const lbfgsfloatval_t *x, - lbfgsfloatval_t *g, - const int n, - const lbfgsfloatval_t step - ); - -/** - * Callback interface to receive the progress of the optimization process. - * - * The lbfgs() function call this function for each iteration. Implementing - * this function, a client program can store or display the current progress - * of the optimization process. - * - * @param instance The user data sent for lbfgs() function by the client. - * @param x The current values of variables. - * @param g The current gradient values of variables. - * @param fx The current value of the objective function. - * @param xnorm The Euclidean norm of the variables. - * @param gnorm The Euclidean norm of the gradients. - * @param step The line-search step used for this iteration. - * @param n The number of variables. - * @param k The iteration count. - * @param ls The number of evaluations called for this iteration. - * @retval int Zero to continue the optimization process. Returning a - * non-zero value will cancel the optimization process. - */ -typedef int (*lbfgs_progress_t)( - void *instance, - const lbfgsfloatval_t *x, - const lbfgsfloatval_t *g, - const lbfgsfloatval_t fx, - const lbfgsfloatval_t xnorm, - const lbfgsfloatval_t gnorm, - const lbfgsfloatval_t step, - int n, - int k, - int ls - ); - -/* -A user must implement a function compatible with ::lbfgs_evaluate_t (evaluation -callback) and pass the pointer to the callback function to lbfgs() arguments. -Similarly, a user can implement a function compatible with ::lbfgs_progress_t -(progress callback) to obtain the current progress (e.g., variables, function -value, ||G||, etc) and to cancel the iteration process if necessary. -Implementation of a progress callback is optional: a user can pass \c NULL if -progress notification is not necessary. - -In addition, a user must preserve two requirements: - - The number of variables must be multiples of 16 (this is not 4). - - The memory block of variable array ::x must be aligned to 16. - -This algorithm terminates an optimization -when: - - ||G|| < \epsilon \cdot \max(1, ||x||) . - -In this formula, ||.|| denotes the Euclidean norm. -*/ - -/** - * Start a L-BFGS optimization. - * - * @param n The number of variables. - * @param x The array of variables. A client program can set - * default values for the optimization and receive the - * optimization result through this array. This array - * must be allocated by ::lbfgs_malloc function - * for libLBFGS built with SSE/SSE2 optimization routine - * enabled. The library built without SSE/SSE2 - * optimization does not have such a requirement. - * @param ptr_fx The pointer to the variable that receives the final - * value of the objective function for the variables. - * This argument can be set to \c NULL if the final - * value of the objective function is unnecessary. - * @param proc_evaluate The callback function to provide function and - * gradient evaluations given a current values of - * variables. A client program must implement a - * callback function compatible with \ref - * lbfgs_evaluate_t and pass the pointer to the - * callback function. - * @param proc_progress The callback function to receive the progress - * (the number of iterations, the current value of - * the objective function) of the minimization - * process. This argument can be set to \c NULL if - * a progress report is unnecessary. - * @param instance A user data for the client program. The callback - * functions will receive the value of this argument. - * @param param The pointer to a structure representing parameters for - * L-BFGS optimization. A client program can set this - * parameter to \c NULL to use the default parameters. - * Call lbfgs_parameter_init() function to fill a - * structure with the default values. - * @retval int The status code. This function returns zero if the - * minimization process terminates without an error. A - * non-zero value indicates an error. - */ -int lbfgs( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *ptr_fx, - lbfgs_evaluate_t proc_evaluate, - lbfgs_progress_t proc_progress, - void *instance, - lbfgs_parameter_t *param - ); - -/** - * Initialize L-BFGS parameters to the default values. - * - * Call this function to fill a parameter structure with the default values - * and overwrite parameter values if necessary. - * - * @param param The pointer to the parameter structure. - */ -void lbfgs_parameter_init(lbfgs_parameter_t *param); - -/** - * Allocate an array for variables. - * - * This function allocates an array of variables for the convenience of - * ::lbfgs function; the function has a requreiemt for a variable array - * when libLBFGS is built with SSE/SSE2 optimization routines. A user does - * not have to use this function for libLBFGS built without SSE/SSE2 - * optimization. - * - * @param n The number of variables. - */ -lbfgsfloatval_t* lbfgs_malloc(int n); - -/** - * Free an array of variables. - * - * @param x The array of variables allocated by ::lbfgs_malloc - * function. - */ -void lbfgs_free(lbfgsfloatval_t *x); - -/** @} */ - -#ifdef __cplusplus -} -#endif/*__cplusplus*/ - - - -/** -@mainpage libLBFGS: a library of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) - -@section intro Introduction - -This library is a C port of the implementation of Limited-memory -Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal. -The original FORTRAN source code is available at: -http://www.ece.northwestern.edu/~nocedal/lbfgs.html - -The L-BFGS method solves the unconstrainted minimization problem, - -
-    minimize F(x), x = (x1, x2, ..., xN),
-
- -only if the objective function F(x) and its gradient G(x) are computable. The -well-known Newton's method requires computation of the inverse of the hessian -matrix of the objective function. However, the computational cost for the -inverse hessian matrix is expensive especially when the objective function -takes a large number of variables. The L-BFGS method iteratively finds a -minimizer by approximating the inverse hessian matrix by information from last -m iterations. This innovation saves the memory storage and computational time -drastically for large-scaled problems. - -Among the various ports of L-BFGS, this library provides several features: -- Optimization with L1-norm (Orthant-Wise Limited-memory Quasi-Newton - (OWL-QN) method): - In addition to standard minimization problems, the library can minimize - a function F(x) combined with L1-norm |x| of the variables, - {F(x) + C |x|}, where C is a constant scalar parameter. This feature is - useful for estimating parameters of sparse log-linear models (e.g., - logistic regression and maximum entropy) with L1-regularization (or - Laplacian prior). -- Clean C code: - Unlike C codes generated automatically by f2c (Fortran 77 into C converter), - this port includes changes based on my interpretations, improvements, - optimizations, and clean-ups so that the ported code would be well-suited - for a C code. In addition to comments inherited from the original code, - a number of comments were added through my interpretations. -- Callback interface: - The library receives function and gradient values via a callback interface. - The library also notifies the progress of the optimization by invoking a - callback function. In the original implementation, a user had to set - function and gradient values every time the function returns for obtaining - updated values. -- Thread safe: - The library is thread-safe, which is the secondary gain from the callback - interface. -- Cross platform. The source code can be compiled on Microsoft Visual - Studio 2010, GNU C Compiler (gcc), etc. -- Configurable precision: A user can choose single-precision (float) - or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro. -- SSE/SSE2 optimization: - This library includes SSE/SSE2 optimization (written in compiler intrinsics) - for vector arithmetic operations on Intel/AMD processors. The library uses - SSE for float values and SSE2 for double values. The SSE/SSE2 optimization - routine is disabled by default. - -This library is used by: -- CRFsuite: A fast implementation of Conditional Random Fields (CRFs) -- Classias: A collection of machine-learning algorithms for classification -- mlegp: an R package for maximum likelihood estimates for Gaussian processes -- imaging2: the imaging2 class library -- Algorithm::LBFGS - Perl extension for L-BFGS -- YAP-LBFGS (an interface to call libLBFGS from YAP Prolog) - -@section download Download - -- Source code -- GitHub repository - -libLBFGS is distributed under the term of the -MIT license. - -@section changelog History -- Version 1.10 (2010-12-22): - - Fixed compiling errors on Mac OS X; this patch was kindly submitted by - Nic Schraudolph. - - Reduced compiling warnings on Mac OS X; this patch was kindly submitted - by Tamas Nepusz. - - Replaced memalign() with posix_memalign(). - - Updated solution and project files for Microsoft Visual Studio 2010. -- Version 1.9 (2010-01-29): - - Fixed a mistake in checking the validity of the parameters "ftol" and - "wolfe"; this was discovered by Kevin S. Van Horn. -- Version 1.8 (2009-07-13): - - Accepted the patch submitted by Takashi Imamichi; - the backtracking method now has three criteria for choosing the step - length: - - ::LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: sufficient decrease (Armijo) - condition only - - ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE: regular Wolfe condition - (sufficient decrease condition + curvature condition) - - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: strong Wolfe condition - - Updated the documentation to explain the above three criteria. -- Version 1.7 (2009-02-28): - - Improved OWL-QN routines for stability. - - Removed the support of OWL-QN method in MoreThuente algorithm because - it accidentally fails in early stages of iterations for some objectives. - Because of this change, the OW-LQN method must be used with the - backtracking algorithm (::LBFGS_LINESEARCH_BACKTRACKING), or the - library returns ::LBFGSERR_INVALID_LINESEARCH. - - Renamed line search algorithms as follows: - - ::LBFGS_LINESEARCH_BACKTRACKING: regular Wolfe condition. - - ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE: regular Wolfe condition. - - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG: strong Wolfe condition. - - Source code clean-up. -- Version 1.6 (2008-11-02): - - Improved line-search algorithm with strong Wolfe condition, which was - contributed by Takashi Imamichi. This routine is now default for - ::LBFGS_LINESEARCH_BACKTRACKING. The previous line search algorithm - with regular Wolfe condition is still available as - ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE. - - Configurable stop index for L1-norm computation. A member variable - ::lbfgs_parameter_t::orthantwise_end was added to specify the index - number at which the library stops computing the L1 norm of the - variables. This is useful to prevent some variables from being - regularized by the OW-LQN method. - - A sample program written in C++ (sample/sample.cpp). -- Version 1.5 (2008-07-10): - - Configurable starting index for L1-norm computation. A member variable - ::lbfgs_parameter_t::orthantwise_start was added to specify the index - number from which the library computes the L1 norm of the variables. - This is useful to prevent some variables from being regularized by the - OWL-QN method. - - Fixed a zero-division error when the initial variables have already - been a minimizer (reported by Takashi Imamichi). In this case, the - library returns ::LBFGS_ALREADY_MINIMIZED status code. - - Defined ::LBFGS_SUCCESS status code as zero; removed unused constants, - LBFGSFALSE and LBFGSTRUE. - - Fixed a compile error in an implicit down-cast. -- Version 1.4 (2008-04-25): - - Configurable line search algorithms. A member variable - ::lbfgs_parameter_t::linesearch was added to choose either MoreThuente - method (::LBFGS_LINESEARCH_MORETHUENTE) or backtracking algorithm - (::LBFGS_LINESEARCH_BACKTRACKING). - - Fixed a bug: the previous version did not compute psuedo-gradients - properly in the line search routines for OWL-QN. This bug might quit - an iteration process too early when the OWL-QN routine was activated - (0 < ::lbfgs_parameter_t::orthantwise_c). - - Configure script for POSIX environments. - - SSE/SSE2 optimizations with GCC. - - New functions ::lbfgs_malloc and ::lbfgs_free to use SSE/SSE2 routines - transparently. It is uncessary to use these functions for libLBFGS built - without SSE/SSE2 routines; you can still use any memory allocators if - SSE/SSE2 routines are disabled in libLBFGS. -- Version 1.3 (2007-12-16): - - An API change. An argument was added to lbfgs() function to receive the - final value of the objective function. This argument can be set to - \c NULL if the final value is unnecessary. - - Fixed a null-pointer bug in the sample code (reported by Takashi Imamichi). - - Added build scripts for Microsoft Visual Studio 2005 and GCC. - - Added README file. -- Version 1.2 (2007-12-13): - - Fixed a serious bug in orthant-wise L-BFGS. - An important variable was used without initialization. -- Version 1.1 (2007-12-01): - - Implemented orthant-wise L-BFGS. - - Implemented lbfgs_parameter_init() function. - - Fixed several bugs. - - API documentation. -- Version 1.0 (2007-09-20): - - Initial release. - -@section api Documentation - -- @ref liblbfgs_api "libLBFGS API" - -@section sample Sample code - -@include sample.c - -@section ack Acknowledgements - -The L-BFGS algorithm is described in: - - Jorge Nocedal. - Updating Quasi-Newton Matrices with Limited Storage. - Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. - - Dong C. Liu and Jorge Nocedal. - On the limited memory BFGS method for large scale optimization. - Mathematical Programming B, Vol. 45, No. 3, pp. 503-528, 1989. - -The line search algorithms used in this implementation are described in: - - John E. Dennis and Robert B. Schnabel. - Numerical Methods for Unconstrained Optimization and Nonlinear - Equations, Englewood Cliffs, 1983. - - Jorge J. More and David J. Thuente. - Line search algorithm with guaranteed sufficient decrease. - ACM Transactions on Mathematical Software (TOMS), Vol. 20, No. 3, - pp. 286-307, 1994. - -This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) -method presented in: - - Galen Andrew and Jianfeng Gao. - Scalable training of L1-regularized log-linear models. - In Proceedings of the 24th International Conference on Machine - Learning (ICML 2007), pp. 33-40, 2007. - -Special thanks go to: - - Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about - OWL-QN - - Takashi Imamichi for the useful enhancements of the backtracking method - - Kevin S. Van Horn, Nic Schraudolph, and Tamas Nepusz for bug fixes - -Finally I would like to thank the original author, Jorge Nocedal, who has been -distributing the effieicnt and explanatory implementation in an open source -licence. - -@section reference Reference - -- L-BFGS by Jorge Nocedal. -- Orthant-Wise Limited-memory Quasi-Newton Optimizer for L1-regularized Objectives by Galen Andrew. -- C port (via f2c) by Taku Kudo. -- C#/C++/Delphi/VisualBasic6 port in ALGLIB. -- Computational Crystallography Toolbox includes - scitbx::lbfgs. -*/ - -#endif/*__LBFGS_H__*/ diff --git a/Modules/ThirdParty/libLBFGS/src/arithmetic_ansi.h b/Modules/ThirdParty/libLBFGS/src/arithmetic_ansi.h deleted file mode 100644 index fa390da37c7..00000000000 --- a/Modules/ThirdParty/libLBFGS/src/arithmetic_ansi.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * ANSI C implementation of vector operations. - * - * Copyright (c) 2007-2010 Naoaki Okazaki - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* $Id$ */ - -#include -#include - -#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT -#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U) -#else -#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.) -#endif/*LBFGS_IEEE_FLOAT*/ - -inline static void* vecalloc(size_t size) -{ - void *memblock = malloc(size); - if (memblock) { - memset(memblock, 0, size); - } - return memblock; -} - -inline static void vecfree(void *memblock) -{ - free(memblock); -} - -inline static void vecset(lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n) -{ - int i; - - for (i = 0;i < n;++i) { - x[i] = c; - } -} - -inline static void veccpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) -{ - int i; - - for (i = 0;i < n;++i) { - y[i] = x[i]; - } -} - -inline static void vecncpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) -{ - int i; - - for (i = 0;i < n;++i) { - y[i] = -x[i]; - } -} - -inline static void vecadd(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n) -{ - int i; - - for (i = 0;i < n;++i) { - y[i] += c * x[i]; - } -} - -inline static void vecdiff(lbfgsfloatval_t *z, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n) -{ - int i; - - for (i = 0;i < n;++i) { - z[i] = x[i] - y[i]; - } -} - -inline static void vecscale(lbfgsfloatval_t *y, const lbfgsfloatval_t c, const int n) -{ - int i; - - for (i = 0;i < n;++i) { - y[i] *= c; - } -} - -inline static void vecmul(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) -{ - int i; - - for (i = 0;i < n;++i) { - y[i] *= x[i]; - } -} - -inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n) -{ - int i; - *s = 0.; - for (i = 0;i < n;++i) { - *s += x[i] * y[i]; - } -} - -inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) -{ - vecdot(s, x, x, n); - *s = (lbfgsfloatval_t)sqrt(*s); -} - -inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) -{ - vec2norm(s, x, n); - *s = (lbfgsfloatval_t)(1.0 / *s); -} diff --git a/Modules/ThirdParty/libLBFGS/src/arithmetic_sse_double.h b/Modules/ThirdParty/libLBFGS/src/arithmetic_sse_double.h deleted file mode 100644 index 83405eeb1f1..00000000000 --- a/Modules/ThirdParty/libLBFGS/src/arithmetic_sse_double.h +++ /dev/null @@ -1,294 +0,0 @@ -/* - * SSE2 implementation of vector oprations (64bit double). - * - * Copyright (c) 2007-2010 Naoaki Okazaki - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* $Id$ */ - -#include -#ifndef __APPLE__ -#include -#endif -#include - -#if 1400 <= _MSC_VER -#include -#endif/*1400 <= _MSC_VER*/ - -#if HAVE_EMMINTRIN_H -#include -#endif/*HAVE_EMMINTRIN_H*/ - -inline static void* vecalloc(size_t size) -{ -#if defined(_MSC_VER) - void *memblock = _aligned_malloc(size, 16); -#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */ - void *memblock = malloc(size); -#else - void *memblock = NULL, *p = NULL; - if (posix_memalign(&p, 16, size) == 0) { - memblock = p; - } -#endif - if (memblock != NULL) { - memset(memblock, 0, size); - } - return memblock; -} - -inline static void vecfree(void *memblock) -{ -#ifdef _MSC_VER - _aligned_free(memblock); -#else - free(memblock); -#endif -} - -#define fsigndiff(x, y) \ - ((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002) - -#define vecset(x, c, n) \ -{ \ - int i; \ - __m128d XMM0 = _mm_set1_pd(c); \ - for (i = 0;i < (n);i += 8) { \ - _mm_store_pd((x)+i , XMM0); \ - _mm_store_pd((x)+i+2, XMM0); \ - _mm_store_pd((x)+i+4, XMM0); \ - _mm_store_pd((x)+i+6, XMM0); \ - } \ -} - -#define veccpy(y, x, n) \ -{ \ - int i; \ - for (i = 0;i < (n);i += 8) { \ - __m128d XMM0 = _mm_load_pd((x)+i ); \ - __m128d XMM1 = _mm_load_pd((x)+i+2); \ - __m128d XMM2 = _mm_load_pd((x)+i+4); \ - __m128d XMM3 = _mm_load_pd((x)+i+6); \ - _mm_store_pd((y)+i , XMM0); \ - _mm_store_pd((y)+i+2, XMM1); \ - _mm_store_pd((y)+i+4, XMM2); \ - _mm_store_pd((y)+i+6, XMM3); \ - } \ -} - -#define vecncpy(y, x, n) \ -{ \ - int i; \ - for (i = 0;i < (n);i += 8) { \ - __m128d XMM0 = _mm_setzero_pd(); \ - __m128d XMM1 = _mm_setzero_pd(); \ - __m128d XMM2 = _mm_setzero_pd(); \ - __m128d XMM3 = _mm_setzero_pd(); \ - __m128d XMM4 = _mm_load_pd((x)+i ); \ - __m128d XMM5 = _mm_load_pd((x)+i+2); \ - __m128d XMM6 = _mm_load_pd((x)+i+4); \ - __m128d XMM7 = _mm_load_pd((x)+i+6); \ - XMM0 = _mm_sub_pd(XMM0, XMM4); \ - XMM1 = _mm_sub_pd(XMM1, XMM5); \ - XMM2 = _mm_sub_pd(XMM2, XMM6); \ - XMM3 = _mm_sub_pd(XMM3, XMM7); \ - _mm_store_pd((y)+i , XMM0); \ - _mm_store_pd((y)+i+2, XMM1); \ - _mm_store_pd((y)+i+4, XMM2); \ - _mm_store_pd((y)+i+6, XMM3); \ - } \ -} - -#define vecadd(y, x, c, n) \ -{ \ - int i; \ - __m128d XMM7 = _mm_set1_pd(c); \ - for (i = 0;i < (n);i += 4) { \ - __m128d XMM0 = _mm_load_pd((x)+i ); \ - __m128d XMM1 = _mm_load_pd((x)+i+2); \ - __m128d XMM2 = _mm_load_pd((y)+i ); \ - __m128d XMM3 = _mm_load_pd((y)+i+2); \ - XMM0 = _mm_mul_pd(XMM0, XMM7); \ - XMM1 = _mm_mul_pd(XMM1, XMM7); \ - XMM2 = _mm_add_pd(XMM2, XMM0); \ - XMM3 = _mm_add_pd(XMM3, XMM1); \ - _mm_store_pd((y)+i , XMM2); \ - _mm_store_pd((y)+i+2, XMM3); \ - } \ -} - -#define vecdiff(z, x, y, n) \ -{ \ - int i; \ - for (i = 0;i < (n);i += 8) { \ - __m128d XMM0 = _mm_load_pd((x)+i ); \ - __m128d XMM1 = _mm_load_pd((x)+i+2); \ - __m128d XMM2 = _mm_load_pd((x)+i+4); \ - __m128d XMM3 = _mm_load_pd((x)+i+6); \ - __m128d XMM4 = _mm_load_pd((y)+i ); \ - __m128d XMM5 = _mm_load_pd((y)+i+2); \ - __m128d XMM6 = _mm_load_pd((y)+i+4); \ - __m128d XMM7 = _mm_load_pd((y)+i+6); \ - XMM0 = _mm_sub_pd(XMM0, XMM4); \ - XMM1 = _mm_sub_pd(XMM1, XMM5); \ - XMM2 = _mm_sub_pd(XMM2, XMM6); \ - XMM3 = _mm_sub_pd(XMM3, XMM7); \ - _mm_store_pd((z)+i , XMM0); \ - _mm_store_pd((z)+i+2, XMM1); \ - _mm_store_pd((z)+i+4, XMM2); \ - _mm_store_pd((z)+i+6, XMM3); \ - } \ -} - -#define vecscale(y, c, n) \ -{ \ - int i; \ - __m128d XMM7 = _mm_set1_pd(c); \ - for (i = 0;i < (n);i += 4) { \ - __m128d XMM0 = _mm_load_pd((y)+i ); \ - __m128d XMM1 = _mm_load_pd((y)+i+2); \ - XMM0 = _mm_mul_pd(XMM0, XMM7); \ - XMM1 = _mm_mul_pd(XMM1, XMM7); \ - _mm_store_pd((y)+i , XMM0); \ - _mm_store_pd((y)+i+2, XMM1); \ - } \ -} - -#define vecmul(y, x, n) \ -{ \ - int i; \ - for (i = 0;i < (n);i += 8) { \ - __m128d XMM0 = _mm_load_pd((x)+i ); \ - __m128d XMM1 = _mm_load_pd((x)+i+2); \ - __m128d XMM2 = _mm_load_pd((x)+i+4); \ - __m128d XMM3 = _mm_load_pd((x)+i+6); \ - __m128d XMM4 = _mm_load_pd((y)+i ); \ - __m128d XMM5 = _mm_load_pd((y)+i+2); \ - __m128d XMM6 = _mm_load_pd((y)+i+4); \ - __m128d XMM7 = _mm_load_pd((y)+i+6); \ - XMM4 = _mm_mul_pd(XMM4, XMM0); \ - XMM5 = _mm_mul_pd(XMM5, XMM1); \ - XMM6 = _mm_mul_pd(XMM6, XMM2); \ - XMM7 = _mm_mul_pd(XMM7, XMM3); \ - _mm_store_pd((y)+i , XMM4); \ - _mm_store_pd((y)+i+2, XMM5); \ - _mm_store_pd((y)+i+4, XMM6); \ - _mm_store_pd((y)+i+6, XMM7); \ - } \ -} - - - -#if 3 <= __SSE__ || defined(__SSE3__) -/* - Horizontal add with haddps SSE3 instruction. The work register (rw) - is unused. - */ -#define __horizontal_sum(r, rw) \ - r = _mm_hadd_ps(r, r); \ - r = _mm_hadd_ps(r, r); - -#else -/* - Horizontal add with SSE instruction. The work register (rw) is used. - */ -#define __horizontal_sum(r, rw) \ - rw = r; \ - r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \ - r = _mm_add_ps(r, rw); \ - rw = r; \ - r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \ - r = _mm_add_ps(r, rw); - -#endif - -#define vecdot(s, x, y, n) \ -{ \ - int i; \ - __m128d XMM0 = _mm_setzero_pd(); \ - __m128d XMM1 = _mm_setzero_pd(); \ - __m128d XMM2, XMM3, XMM4, XMM5; \ - for (i = 0;i < (n);i += 4) { \ - XMM2 = _mm_load_pd((x)+i ); \ - XMM3 = _mm_load_pd((x)+i+2); \ - XMM4 = _mm_load_pd((y)+i ); \ - XMM5 = _mm_load_pd((y)+i+2); \ - XMM2 = _mm_mul_pd(XMM2, XMM4); \ - XMM3 = _mm_mul_pd(XMM3, XMM5); \ - XMM0 = _mm_add_pd(XMM0, XMM2); \ - XMM1 = _mm_add_pd(XMM1, XMM3); \ - } \ - XMM0 = _mm_add_pd(XMM0, XMM1); \ - XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ - XMM0 = _mm_add_pd(XMM0, XMM1); \ - _mm_store_sd((s), XMM0); \ -} - -#define vec2norm(s, x, n) \ -{ \ - int i; \ - __m128d XMM0 = _mm_setzero_pd(); \ - __m128d XMM1 = _mm_setzero_pd(); \ - __m128d XMM2, XMM3, XMM4, XMM5; \ - for (i = 0;i < (n);i += 4) { \ - XMM2 = _mm_load_pd((x)+i ); \ - XMM3 = _mm_load_pd((x)+i+2); \ - XMM4 = XMM2; \ - XMM5 = XMM3; \ - XMM2 = _mm_mul_pd(XMM2, XMM4); \ - XMM3 = _mm_mul_pd(XMM3, XMM5); \ - XMM0 = _mm_add_pd(XMM0, XMM2); \ - XMM1 = _mm_add_pd(XMM1, XMM3); \ - } \ - XMM0 = _mm_add_pd(XMM0, XMM1); \ - XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ - XMM0 = _mm_add_pd(XMM0, XMM1); \ - XMM0 = _mm_sqrt_pd(XMM0); \ - _mm_store_sd((s), XMM0); \ -} - - -#define vec2norminv(s, x, n) \ -{ \ - int i; \ - __m128d XMM0 = _mm_setzero_pd(); \ - __m128d XMM1 = _mm_setzero_pd(); \ - __m128d XMM2, XMM3, XMM4, XMM5; \ - for (i = 0;i < (n);i += 4) { \ - XMM2 = _mm_load_pd((x)+i ); \ - XMM3 = _mm_load_pd((x)+i+2); \ - XMM4 = XMM2; \ - XMM5 = XMM3; \ - XMM2 = _mm_mul_pd(XMM2, XMM4); \ - XMM3 = _mm_mul_pd(XMM3, XMM5); \ - XMM0 = _mm_add_pd(XMM0, XMM2); \ - XMM1 = _mm_add_pd(XMM1, XMM3); \ - } \ - XMM2 = _mm_set1_pd(1.0); \ - XMM0 = _mm_add_pd(XMM0, XMM1); \ - XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ - XMM0 = _mm_add_pd(XMM0, XMM1); \ - XMM0 = _mm_sqrt_pd(XMM0); \ - XMM2 = _mm_div_pd(XMM2, XMM0); \ - _mm_store_sd((s), XMM2); \ -} diff --git a/Modules/ThirdParty/libLBFGS/src/arithmetic_sse_float.h b/Modules/ThirdParty/libLBFGS/src/arithmetic_sse_float.h deleted file mode 100644 index 835b8aa8a19..00000000000 --- a/Modules/ThirdParty/libLBFGS/src/arithmetic_sse_float.h +++ /dev/null @@ -1,298 +0,0 @@ -/* - * SSE/SSE3 implementation of vector oprations (32bit float). - * - * Copyright (c) 2007-2010 Naoaki Okazaki - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* $Id$ */ - -#include -#ifndef __APPLE__ -#include -#endif -#include - -#if 1400 <= _MSC_VER -#include -#endif/*_MSC_VER*/ - -#if HAVE_XMMINTRIN_H -#include -#endif/*HAVE_XMMINTRIN_H*/ - -#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT -#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U) -#else -#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.) -#endif/*LBFGS_IEEE_FLOAT*/ - -inline static void* vecalloc(size_t size) -{ -#if defined(_MSC_VER) - void *memblock = _aligned_malloc(size, 16); -#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */ - void *memblock = malloc(size); -#else - void *memblock = NULL, *p = NULL; - if (posix_memalign(&p, 16, size) == 0) { - memblock = p; - } -#endif - if (memblock != NULL) { - memset(memblock, 0, size); - } - return memblock; -} - -inline static void vecfree(void *memblock) -{ - _aligned_free(memblock); -} - -#define vecset(x, c, n) \ -{ \ - int i; \ - __m128 XMM0 = _mm_set_ps1(c); \ - for (i = 0;i < (n);i += 16) { \ - _mm_store_ps((x)+i , XMM0); \ - _mm_store_ps((x)+i+ 4, XMM0); \ - _mm_store_ps((x)+i+ 8, XMM0); \ - _mm_store_ps((x)+i+12, XMM0); \ - } \ -} - -#define veccpy(y, x, n) \ -{ \ - int i; \ - for (i = 0;i < (n);i += 16) { \ - __m128 XMM0 = _mm_load_ps((x)+i ); \ - __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ - __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ - __m128 XMM3 = _mm_load_ps((x)+i+12); \ - _mm_store_ps((y)+i , XMM0); \ - _mm_store_ps((y)+i+ 4, XMM1); \ - _mm_store_ps((y)+i+ 8, XMM2); \ - _mm_store_ps((y)+i+12, XMM3); \ - } \ -} - -#define vecncpy(y, x, n) \ -{ \ - int i; \ - const uint32_t mask = 0x80000000; \ - __m128 XMM4 = _mm_load_ps1((float*)&mask); \ - for (i = 0;i < (n);i += 16) { \ - __m128 XMM0 = _mm_load_ps((x)+i ); \ - __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ - __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ - __m128 XMM3 = _mm_load_ps((x)+i+12); \ - XMM0 = _mm_xor_ps(XMM0, XMM4); \ - XMM1 = _mm_xor_ps(XMM1, XMM4); \ - XMM2 = _mm_xor_ps(XMM2, XMM4); \ - XMM3 = _mm_xor_ps(XMM3, XMM4); \ - _mm_store_ps((y)+i , XMM0); \ - _mm_store_ps((y)+i+ 4, XMM1); \ - _mm_store_ps((y)+i+ 8, XMM2); \ - _mm_store_ps((y)+i+12, XMM3); \ - } \ -} - -#define vecadd(y, x, c, n) \ -{ \ - int i; \ - __m128 XMM7 = _mm_set_ps1(c); \ - for (i = 0;i < (n);i += 8) { \ - __m128 XMM0 = _mm_load_ps((x)+i ); \ - __m128 XMM1 = _mm_load_ps((x)+i+4); \ - __m128 XMM2 = _mm_load_ps((y)+i ); \ - __m128 XMM3 = _mm_load_ps((y)+i+4); \ - XMM0 = _mm_mul_ps(XMM0, XMM7); \ - XMM1 = _mm_mul_ps(XMM1, XMM7); \ - XMM2 = _mm_add_ps(XMM2, XMM0); \ - XMM3 = _mm_add_ps(XMM3, XMM1); \ - _mm_store_ps((y)+i , XMM2); \ - _mm_store_ps((y)+i+4, XMM3); \ - } \ -} - -#define vecdiff(z, x, y, n) \ -{ \ - int i; \ - for (i = 0;i < (n);i += 16) { \ - __m128 XMM0 = _mm_load_ps((x)+i ); \ - __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ - __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ - __m128 XMM3 = _mm_load_ps((x)+i+12); \ - __m128 XMM4 = _mm_load_ps((y)+i ); \ - __m128 XMM5 = _mm_load_ps((y)+i+ 4); \ - __m128 XMM6 = _mm_load_ps((y)+i+ 8); \ - __m128 XMM7 = _mm_load_ps((y)+i+12); \ - XMM0 = _mm_sub_ps(XMM0, XMM4); \ - XMM1 = _mm_sub_ps(XMM1, XMM5); \ - XMM2 = _mm_sub_ps(XMM2, XMM6); \ - XMM3 = _mm_sub_ps(XMM3, XMM7); \ - _mm_store_ps((z)+i , XMM0); \ - _mm_store_ps((z)+i+ 4, XMM1); \ - _mm_store_ps((z)+i+ 8, XMM2); \ - _mm_store_ps((z)+i+12, XMM3); \ - } \ -} - -#define vecscale(y, c, n) \ -{ \ - int i; \ - __m128 XMM7 = _mm_set_ps1(c); \ - for (i = 0;i < (n);i += 8) { \ - __m128 XMM0 = _mm_load_ps((y)+i ); \ - __m128 XMM1 = _mm_load_ps((y)+i+4); \ - XMM0 = _mm_mul_ps(XMM0, XMM7); \ - XMM1 = _mm_mul_ps(XMM1, XMM7); \ - _mm_store_ps((y)+i , XMM0); \ - _mm_store_ps((y)+i+4, XMM1); \ - } \ -} - -#define vecmul(y, x, n) \ -{ \ - int i; \ - for (i = 0;i < (n);i += 16) { \ - __m128 XMM0 = _mm_load_ps((x)+i ); \ - __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ - __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ - __m128 XMM3 = _mm_load_ps((x)+i+12); \ - __m128 XMM4 = _mm_load_ps((y)+i ); \ - __m128 XMM5 = _mm_load_ps((y)+i+ 4); \ - __m128 XMM6 = _mm_load_ps((y)+i+ 8); \ - __m128 XMM7 = _mm_load_ps((y)+i+12); \ - XMM4 = _mm_mul_ps(XMM4, XMM0); \ - XMM5 = _mm_mul_ps(XMM5, XMM1); \ - XMM6 = _mm_mul_ps(XMM6, XMM2); \ - XMM7 = _mm_mul_ps(XMM7, XMM3); \ - _mm_store_ps((y)+i , XMM4); \ - _mm_store_ps((y)+i+ 4, XMM5); \ - _mm_store_ps((y)+i+ 8, XMM6); \ - _mm_store_ps((y)+i+12, XMM7); \ - } \ -} - - - -#if 3 <= __SSE__ || defined(__SSE3__) -/* - Horizontal add with haddps SSE3 instruction. The work register (rw) - is unused. - */ -#define __horizontal_sum(r, rw) \ - r = _mm_hadd_ps(r, r); \ - r = _mm_hadd_ps(r, r); - -#else -/* - Horizontal add with SSE instruction. The work register (rw) is used. - */ -#define __horizontal_sum(r, rw) \ - rw = r; \ - r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \ - r = _mm_add_ps(r, rw); \ - rw = r; \ - r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \ - r = _mm_add_ps(r, rw); - -#endif - -#define vecdot(s, x, y, n) \ -{ \ - int i; \ - __m128 XMM0 = _mm_setzero_ps(); \ - __m128 XMM1 = _mm_setzero_ps(); \ - __m128 XMM2, XMM3, XMM4, XMM5; \ - for (i = 0;i < (n);i += 8) { \ - XMM2 = _mm_load_ps((x)+i ); \ - XMM3 = _mm_load_ps((x)+i+4); \ - XMM4 = _mm_load_ps((y)+i ); \ - XMM5 = _mm_load_ps((y)+i+4); \ - XMM2 = _mm_mul_ps(XMM2, XMM4); \ - XMM3 = _mm_mul_ps(XMM3, XMM5); \ - XMM0 = _mm_add_ps(XMM0, XMM2); \ - XMM1 = _mm_add_ps(XMM1, XMM3); \ - } \ - XMM0 = _mm_add_ps(XMM0, XMM1); \ - __horizontal_sum(XMM0, XMM1); \ - _mm_store_ss((s), XMM0); \ -} - -#define vec2norm(s, x, n) \ -{ \ - int i; \ - __m128 XMM0 = _mm_setzero_ps(); \ - __m128 XMM1 = _mm_setzero_ps(); \ - __m128 XMM2, XMM3; \ - for (i = 0;i < (n);i += 8) { \ - XMM2 = _mm_load_ps((x)+i ); \ - XMM3 = _mm_load_ps((x)+i+4); \ - XMM2 = _mm_mul_ps(XMM2, XMM2); \ - XMM3 = _mm_mul_ps(XMM3, XMM3); \ - XMM0 = _mm_add_ps(XMM0, XMM2); \ - XMM1 = _mm_add_ps(XMM1, XMM3); \ - } \ - XMM0 = _mm_add_ps(XMM0, XMM1); \ - __horizontal_sum(XMM0, XMM1); \ - XMM2 = XMM0; \ - XMM1 = _mm_rsqrt_ss(XMM0); \ - XMM3 = XMM1; \ - XMM1 = _mm_mul_ss(XMM1, XMM1); \ - XMM1 = _mm_mul_ss(XMM1, XMM3); \ - XMM1 = _mm_mul_ss(XMM1, XMM0); \ - XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \ - XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \ - XMM3 = _mm_add_ss(XMM3, XMM1); \ - XMM3 = _mm_mul_ss(XMM3, XMM2); \ - _mm_store_ss((s), XMM3); \ -} - -#define vec2norminv(s, x, n) \ -{ \ - int i; \ - __m128 XMM0 = _mm_setzero_ps(); \ - __m128 XMM1 = _mm_setzero_ps(); \ - __m128 XMM2, XMM3; \ - for (i = 0;i < (n);i += 16) { \ - XMM2 = _mm_load_ps((x)+i ); \ - XMM3 = _mm_load_ps((x)+i+4); \ - XMM2 = _mm_mul_ps(XMM2, XMM2); \ - XMM3 = _mm_mul_ps(XMM3, XMM3); \ - XMM0 = _mm_add_ps(XMM0, XMM2); \ - XMM1 = _mm_add_ps(XMM1, XMM3); \ - } \ - XMM0 = _mm_add_ps(XMM0, XMM1); \ - __horizontal_sum(XMM0, XMM1); \ - XMM2 = XMM0; \ - XMM1 = _mm_rsqrt_ss(XMM0); \ - XMM3 = XMM1; \ - XMM1 = _mm_mul_ss(XMM1, XMM1); \ - XMM1 = _mm_mul_ss(XMM1, XMM3); \ - XMM1 = _mm_mul_ss(XMM1, XMM0); \ - XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \ - XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \ - XMM3 = _mm_add_ss(XMM3, XMM1); \ - _mm_store_ss((s), XMM3); \ -} diff --git a/Modules/ThirdParty/libLBFGS/src/lbfgs.c b/Modules/ThirdParty/libLBFGS/src/lbfgs.c deleted file mode 100644 index 7bf006b6f54..00000000000 --- a/Modules/ThirdParty/libLBFGS/src/lbfgs.c +++ /dev/null @@ -1,1371 +0,0 @@ -/* - * Limited memory BFGS (L-BFGS). - * - * Copyright (c) 1990, Jorge Nocedal - * Copyright (c) 2007-2010 Naoaki Okazaki - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -/* $Id$ */ - -/* -This library is a C port of the FORTRAN implementation of Limited-memory -Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal. -The original FORTRAN source code is available at: -http://www.ece.northwestern.edu/~nocedal/lbfgs.html - -The L-BFGS algorithm is described in: - - Jorge Nocedal. - Updating Quasi-Newton Matrices with Limited Storage. - Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. - - Dong C. Liu and Jorge Nocedal. - On the limited memory BFGS method for large scale optimization. - Mathematical Programming B, Vol. 45, No. 3, pp. 503-528, 1989. - -The line search algorithms used in this implementation are described in: - - John E. Dennis and Robert B. Schnabel. - Numerical Methods for Unconstrained Optimization and Nonlinear - Equations, Englewood Cliffs, 1983. - - Jorge J. More and David J. Thuente. - Line search algorithm with guaranteed sufficient decrease. - ACM Transactions on Mathematical Software (TOMS), Vol. 20, No. 3, - pp. 286-307, 1994. - -This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) -method presented in: - - Galen Andrew and Jianfeng Gao. - Scalable training of L1-regularized log-linear models. - In Proceedings of the 24th International Conference on Machine - Learning (ICML 2007), pp. 33-40, 2007. - -I would like to thank the original author, Jorge Nocedal, who has been -distributing the effieicnt and explanatory implementation in an open source -licence. -*/ - -#ifdef HAVE_CONFIG_H -#include -#endif/*HAVE_CONFIG_H*/ - -//#include -#include -#include -#include - -#include - -#ifdef _MSC_VER -#define inline __inline -#endif/*_MSC_VER*/ - -#if defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64 -/* Use SSE2 optimization for 64bit double precision. */ -#include "arithmetic_sse_double.h" - -#elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32 -/* Use SSE optimization for 32bit float precision. */ -#include "arithmetic_sse_float.h" - -#else -/* No CPU specific optimization. */ -#include "arithmetic_ansi.h" - -#endif - -#define min2(a, b) ((a) <= (b) ? (a) : (b)) -#define max2(a, b) ((a) >= (b) ? (a) : (b)) -#define max3(a, b, c) max2(max2((a), (b)), (c)); - -struct tag_callback_data { - int n; - void *instance; - lbfgs_evaluate_t proc_evaluate; - lbfgs_progress_t proc_progress; -}; -typedef struct tag_callback_data callback_data_t; - -struct tag_iteration_data { - lbfgsfloatval_t alpha; - lbfgsfloatval_t *s; /* [n] */ - lbfgsfloatval_t *y; /* [n] */ - lbfgsfloatval_t ys; /* vecdot(y, s) */ -}; -typedef struct tag_iteration_data iteration_data_t; - -static const lbfgs_parameter_t _defparam = { - 6, 1e-5, 0, 1e-5, - 0, LBFGS_LINESEARCH_DEFAULT, 40, - 1e-20, 1e20, 1e-4, 0.9, 0.9, 1.0e-16, - 0.0, 0, -1, -}; - -/* Forward function declarations. */ - -typedef int (*line_search_proc)( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *f, - lbfgsfloatval_t *g, - lbfgsfloatval_t *s, - lbfgsfloatval_t *stp, - const lbfgsfloatval_t* xp, - const lbfgsfloatval_t* gp, - lbfgsfloatval_t *wa, - callback_data_t *cd, - const lbfgs_parameter_t *param - ); - -static int line_search_backtracking( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *f, - lbfgsfloatval_t *g, - lbfgsfloatval_t *s, - lbfgsfloatval_t *stp, - const lbfgsfloatval_t* xp, - const lbfgsfloatval_t* gp, - lbfgsfloatval_t *wa, - callback_data_t *cd, - const lbfgs_parameter_t *param - ); - -static int line_search_backtracking_owlqn( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *f, - lbfgsfloatval_t *g, - lbfgsfloatval_t *s, - lbfgsfloatval_t *stp, - const lbfgsfloatval_t* xp, - const lbfgsfloatval_t* gp, - lbfgsfloatval_t *wp, - callback_data_t *cd, - const lbfgs_parameter_t *param - ); - -static int line_search_morethuente( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *f, - lbfgsfloatval_t *g, - lbfgsfloatval_t *s, - lbfgsfloatval_t *stp, - const lbfgsfloatval_t* xp, - const lbfgsfloatval_t* gp, - lbfgsfloatval_t *wa, - callback_data_t *cd, - const lbfgs_parameter_t *param - ); - -static int update_trial_interval( - lbfgsfloatval_t *x, - lbfgsfloatval_t *fx, - lbfgsfloatval_t *dx, - lbfgsfloatval_t *y, - lbfgsfloatval_t *fy, - lbfgsfloatval_t *dy, - lbfgsfloatval_t *t, - lbfgsfloatval_t *ft, - lbfgsfloatval_t *dt, - const lbfgsfloatval_t tmin, - const lbfgsfloatval_t tmax, - int *brackt - ); - -static lbfgsfloatval_t owlqn_x1norm( - const lbfgsfloatval_t* x, - const int start, - const int n - ); - -static void owlqn_pseudo_gradient( - lbfgsfloatval_t* pg, - const lbfgsfloatval_t* x, - const lbfgsfloatval_t* g, - const int n, - const lbfgsfloatval_t c, - const int start, - const int end - ); - -static void owlqn_project( - lbfgsfloatval_t* d, - const lbfgsfloatval_t* sign, - const int start, - const int end - ); - - -#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) -static int round_out_variables(int n) -{ - n += 7; - n /= 8; - n *= 8; - return n; -} -#endif/*defined(USE_SSE)*/ - -lbfgsfloatval_t* lbfgs_malloc(int n) -{ -#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) - n = round_out_variables(n); -#endif/*defined(USE_SSE)*/ - return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * n); -} - -void lbfgs_free(lbfgsfloatval_t *x) -{ - vecfree(x); -} - -void lbfgs_parameter_init(lbfgs_parameter_t *param) -{ - memcpy(param, &_defparam, sizeof(*param)); -} - -int lbfgs( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *ptr_fx, - lbfgs_evaluate_t proc_evaluate, - lbfgs_progress_t proc_progress, - void *instance, - lbfgs_parameter_t *_param - ) -{ - int ret; - int i, j, k, ls, end, bound; - lbfgsfloatval_t step; - - /* Constant parameters and their default values. */ - lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam; - const int m = param.m; - - lbfgsfloatval_t *xp = NULL; - lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL; - lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL; - iteration_data_t *lm = NULL, *it = NULL; - lbfgsfloatval_t ys, yy; - lbfgsfloatval_t xnorm, gnorm, beta; - lbfgsfloatval_t fx = 0.; - lbfgsfloatval_t rate = 0.; - line_search_proc linesearch = line_search_morethuente; - - /* Construct a callback data. */ - callback_data_t cd; - cd.n = n; - cd.instance = instance; - cd.proc_evaluate = proc_evaluate; - cd.proc_progress = proc_progress; - -#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) - /* Round out the number of variables. */ - n = round_out_variables(n); -#endif/*defined(USE_SSE)*/ - - /* Check the input parameters for errors. */ - if (n <= 0) { - return LBFGSERR_INVALID_N; - } -#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) - if (n % 8 != 0) { - return LBFGSERR_INVALID_N_SSE; - } - if ((uintptr_t)(const void*)x % 16 != 0) { - return LBFGSERR_INVALID_X_SSE; - } -#endif/*defined(USE_SSE)*/ - if (param.epsilon < 0.) { - return LBFGSERR_INVALID_EPSILON; - } - if (param.past < 0) { - return LBFGSERR_INVALID_TESTPERIOD; - } - if (param.delta < 0.) { - return LBFGSERR_INVALID_DELTA; - } - if (param.min_step < 0.) { - return LBFGSERR_INVALID_MINSTEP; - } - if (param.max_step < param.min_step) { - return LBFGSERR_INVALID_MAXSTEP; - } - if (param.ftol < 0.) { - return LBFGSERR_INVALID_FTOL; - } - if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || - param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { - if (param.wolfe <= param.ftol || 1. <= param.wolfe) { - return LBFGSERR_INVALID_WOLFE; - } - } - if (param.gtol < 0.) { - return LBFGSERR_INVALID_GTOL; - } - if (param.xtol < 0.) { - return LBFGSERR_INVALID_XTOL; - } - if (param.max_linesearch <= 0) { - return LBFGSERR_INVALID_MAXLINESEARCH; - } - if (param.orthantwise_c < 0.) { - return LBFGSERR_INVALID_ORTHANTWISE; - } - if (param.orthantwise_start < 0 || n < param.orthantwise_start) { - return LBFGSERR_INVALID_ORTHANTWISE_START; - } - if (param.orthantwise_end < 0) { - param.orthantwise_end = n; - } - if (n < param.orthantwise_end) { - return LBFGSERR_INVALID_ORTHANTWISE_END; - } - if (param.orthantwise_c != 0.) { - switch (param.linesearch) { - case LBFGS_LINESEARCH_BACKTRACKING: - linesearch = line_search_backtracking_owlqn; - break; - default: - /* Only the backtracking method is available. */ - return LBFGSERR_INVALID_LINESEARCH; - } - } else { - switch (param.linesearch) { - case LBFGS_LINESEARCH_MORETHUENTE: - linesearch = line_search_morethuente; - break; - case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: - case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: - case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: - linesearch = line_search_backtracking; - break; - default: - return LBFGSERR_INVALID_LINESEARCH; - } - } - - /* Allocate working space. */ - xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); - g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); - gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); - d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); - w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); - if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) { - ret = LBFGSERR_OUTOFMEMORY; - goto lbfgs_exit; - } - - if (param.orthantwise_c != 0.) { - /* Allocate working space for OW-LQN. */ - pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); - if (pg == NULL) { - ret = LBFGSERR_OUTOFMEMORY; - goto lbfgs_exit; - } - } - - /* Allocate limited memory storage. */ - lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t)); - if (lm == NULL) { - ret = LBFGSERR_OUTOFMEMORY; - goto lbfgs_exit; - } - - /* Initialize the limited memory. */ - for (i = 0;i < m;++i) { - it = &lm[i]; - it->alpha = 0; - it->ys = 0; - it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); - it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); - if (it->s == NULL || it->y == NULL) { - ret = LBFGSERR_OUTOFMEMORY; - goto lbfgs_exit; - } - } - - /* Allocate an array for storing previous values of the objective function. */ - if (0 < param.past) { - pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t)); - } - - /* Evaluate the function value and its gradient. */ - fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); - if (0. != param.orthantwise_c) { - /* Compute the L1 norm of the variable and add it to the object value. */ - xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end); - fx += xnorm * param.orthantwise_c; - owlqn_pseudo_gradient( - pg, x, g, n, - param.orthantwise_c, param.orthantwise_start, param.orthantwise_end - ); - } - - /* Store the initial value of the objective function. */ - if (pf != NULL) { - pf[0] = fx; - } - - /* - Compute the direction; - we assume the initial hessian matrix H_0 as the identity matrix. - */ - if (param.orthantwise_c == 0.) { - vecncpy(d, g, n); - } else { - vecncpy(d, pg, n); - } - - /* - Make sure that the initial variables are not a minimizer. - */ - vec2norm(&xnorm, x, n); - if (param.orthantwise_c == 0.) { - vec2norm(&gnorm, g, n); - } else { - vec2norm(&gnorm, pg, n); - } - if (xnorm < 1.0) xnorm = 1.0; - if (gnorm / xnorm <= param.epsilon) { - ret = LBFGS_ALREADY_MINIMIZED; - goto lbfgs_exit; - } - - /* Compute the initial step: - step = 1.0 / sqrt(vecdot(d, d, n)) - */ - vec2norminv(&step, d, n); - - k = 1; - end = 0; - for (;;) { - /* Store the current position and gradient vectors. */ - veccpy(xp, x, n); - veccpy(gp, g, n); - - /* Search for an optimal step. */ - if (param.orthantwise_c == 0.) { - ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m); - } else { - ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); - owlqn_pseudo_gradient( - pg, x, g, n, - param.orthantwise_c, param.orthantwise_start, param.orthantwise_end - ); - } - if (ls < 0) { - /* Revert to the previous point. */ - veccpy(x, xp, n); - veccpy(g, gp, n); - ret = ls; - goto lbfgs_exit; - } - - /* Compute x and g norms. */ - vec2norm(&xnorm, x, n); - if (param.orthantwise_c == 0.) { - vec2norm(&gnorm, g, n); - } else { - vec2norm(&gnorm, pg, n); - } - - /* Report the progress. */ - if (cd.proc_progress) { - if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) { - goto lbfgs_exit; - } - } - - /* - Convergence test. - The criterion is given by the following formula: - |g(x)| / \max(1, |x|) < \epsilon - */ - if (xnorm < 1.0) xnorm = 1.0; - if (gnorm / xnorm <= param.epsilon) { - /* Convergence. */ - ret = LBFGS_SUCCESS; - break; - } - - /* - Test for stopping criterion. - The criterion is given by the following formula: - (f(past_x) - f(x)) / f(x) < \delta - */ - if (pf != NULL) { - /* We don't test the stopping criterion while k < past. */ - if (param.past <= k) { - /* Compute the relative improvement from the past. */ - rate = (pf[k % param.past] - fx) / fx; - - /* The stopping criterion. */ - if (rate < param.delta) { - ret = LBFGS_STOP; - break; - } - } - - /* Store the current value of the objective function. */ - pf[k % param.past] = fx; - } - - if (param.max_iterations != 0 && param.max_iterations < k+1) { - /* Maximum number of iterations. */ - ret = LBFGSERR_MAXIMUMITERATION; - break; - } - - /* - Update vectors s and y: - s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. - y_{k+1} = g_{k+1} - g_{k}. - */ - it = &lm[end]; - vecdiff(it->s, x, xp, n); - vecdiff(it->y, g, gp, n); - - /* - Compute scalars ys and yy: - ys = y^t \cdot s = 1 / \rho. - yy = y^t \cdot y. - Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). - */ - vecdot(&ys, it->y, it->s, n); - vecdot(&yy, it->y, it->y, n); - it->ys = ys; - - /* - Recursive formula to compute dir = -(H \cdot g). - This is described in page 779 of: - Jorge Nocedal. - Updating Quasi-Newton Matrices with Limited Storage. - Mathematics of Computation, Vol. 35, No. 151, - pp. 773--782, 1980. - */ - bound = (m <= k) ? m : k; - ++k; - end = (end + 1) % m; - - /* Compute the steepest direction. */ - if (param.orthantwise_c == 0.) { - /* Compute the negative of gradients. */ - vecncpy(d, g, n); - } else { - vecncpy(d, pg, n); - } - - j = end; - for (i = 0;i < bound;++i) { - j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ - it = &lm[j]; - /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */ - vecdot(&it->alpha, it->s, d, n); - it->alpha /= it->ys; - /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */ - vecadd(d, it->y, -it->alpha, n); - } - - vecscale(d, ys / yy, n); - - for (i = 0;i < bound;++i) { - it = &lm[j]; - /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */ - vecdot(&beta, it->y, d, n); - beta /= it->ys; - /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */ - vecadd(d, it->s, it->alpha - beta, n); - j = (j + 1) % m; /* if (++j == m) j = 0; */ - } - - /* - Constrain the search direction for orthant-wise updates. - */ - if (param.orthantwise_c != 0.) { - for (i = param.orthantwise_start;i < param.orthantwise_end;++i) { - if (d[i] * pg[i] >= 0) { - d[i] = 0; - } - } - } - - /* - Now the search direction d is ready. We try step = 1 first. - */ - step = 1.0; - } - -lbfgs_exit: - /* Return the final value of the objective function. */ - if (ptr_fx != NULL) { - *ptr_fx = fx; - } - - vecfree(pf); - - /* Free memory blocks used by this function. */ - if (lm != NULL) { - for (i = 0;i < m;++i) { - vecfree(lm[i].s); - vecfree(lm[i].y); - } - vecfree(lm); - } - vecfree(pg); - vecfree(w); - vecfree(d); - vecfree(gp); - vecfree(g); - vecfree(xp); - - return ret; -} - - - -static int line_search_backtracking( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *f, - lbfgsfloatval_t *g, - lbfgsfloatval_t *s, - lbfgsfloatval_t *stp, - const lbfgsfloatval_t* xp, - const lbfgsfloatval_t* gp, - lbfgsfloatval_t *wp, - callback_data_t *cd, - const lbfgs_parameter_t *param - ) -{ - int count = 0; - lbfgsfloatval_t width, dg; - lbfgsfloatval_t finit, dginit = 0., dgtest; - const lbfgsfloatval_t dec = 0.5, inc = 2.1; - - /* Check the input parameters for errors. */ - if (*stp <= 0.) { - return LBFGSERR_INVALIDPARAMETERS; - } - - /* Compute the initial gradient in the search direction. */ - vecdot(&dginit, g, s, n); - - /* Make sure that s points to a descent direction. */ - if (0 < dginit) { - return LBFGSERR_INCREASEGRADIENT; - } - - /* The initial value of the objective function. */ - finit = *f; - dgtest = param->ftol * dginit; - - for (;;) { - veccpy(x, xp, n); - vecadd(x, s, *stp, n); - - /* Evaluate the function and gradient values. */ - *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); - - ++count; - - if (*f > finit + *stp * dgtest) { - width = dec; - } else { - /* The sufficient decrease condition (Armijo condition). */ - if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) { - /* Exit with the Armijo condition. */ - return count; - } - - /* Check the Wolfe condition. */ - vecdot(&dg, g, s, n); - if (dg < param->wolfe * dginit) { - width = inc; - } else { - if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) { - /* Exit with the regular Wolfe condition. */ - return count; - } - - /* Check the strong Wolfe condition. */ - if(dg > -param->wolfe * dginit) { - width = dec; - } else { - /* Exit with the strong Wolfe condition. */ - return count; - } - } - } - - if (*stp < param->min_step) { - /* The step is the minimum value. */ - return LBFGSERR_MINIMUMSTEP; - } - if (*stp > param->max_step) { - /* The step is the maximum value. */ - return LBFGSERR_MAXIMUMSTEP; - } - if (param->max_linesearch <= count) { - /* Maximum number of iteration. */ - return LBFGSERR_MAXIMUMLINESEARCH; - } - - (*stp) *= width; - } -} - - - -static int line_search_backtracking_owlqn( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *f, - lbfgsfloatval_t *g, - lbfgsfloatval_t *s, - lbfgsfloatval_t *stp, - const lbfgsfloatval_t* xp, - const lbfgsfloatval_t* gp, - lbfgsfloatval_t *wp, - callback_data_t *cd, - const lbfgs_parameter_t *param - ) -{ - int i, count = 0; - lbfgsfloatval_t width = 0.5, norm = 0.; - lbfgsfloatval_t finit = *f, dgtest; - - /* Check the input parameters for errors. */ - if (*stp <= 0.) { - return LBFGSERR_INVALIDPARAMETERS; - } - - /* Choose the orthant for the new point. */ - for (i = 0;i < n;++i) { - wp[i] = (xp[i] == 0.) ? -gp[i] : xp[i]; - } - - for (;;) { - /* Update the current point. */ - veccpy(x, xp, n); - vecadd(x, s, *stp, n); - - /* The current point is projected onto the orthant. */ - owlqn_project(x, wp, param->orthantwise_start, param->orthantwise_end); - - /* Evaluate the function and gradient values. */ - *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); - - /* Compute the L1 norm of the variables and add it to the object value. */ - norm = owlqn_x1norm(x, param->orthantwise_start, param->orthantwise_end); - *f += norm * param->orthantwise_c; - - ++count; - - dgtest = 0.; - for (i = 0;i < n;++i) { - dgtest += (x[i] - xp[i]) * gp[i]; - } - - if (*f <= finit + param->ftol * dgtest) { - /* The sufficient decrease condition. */ - return count; - } - - if (*stp < param->min_step) { - /* The step is the minimum value. */ - return LBFGSERR_MINIMUMSTEP; - } - if (*stp > param->max_step) { - /* The step is the maximum value. */ - return LBFGSERR_MAXIMUMSTEP; - } - if (param->max_linesearch <= count) { - /* Maximum number of iteration. */ - return LBFGSERR_MAXIMUMLINESEARCH; - } - - (*stp) *= width; - } -} - - - -static int line_search_morethuente( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *f, - lbfgsfloatval_t *g, - lbfgsfloatval_t *s, - lbfgsfloatval_t *stp, - const lbfgsfloatval_t* xp, - const lbfgsfloatval_t* gp, - lbfgsfloatval_t *wa, - callback_data_t *cd, - const lbfgs_parameter_t *param - ) -{ - int count = 0; - int brackt, stage1, uinfo = 0; - lbfgsfloatval_t dg; - lbfgsfloatval_t stx, fx, dgx; - lbfgsfloatval_t sty, fy, dgy; - lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm; - lbfgsfloatval_t finit, ftest1, dginit, dgtest; - lbfgsfloatval_t width, prev_width; - lbfgsfloatval_t stmin, stmax; - - /* Check the input parameters for errors. */ - if (*stp <= 0.) { - return LBFGSERR_INVALIDPARAMETERS; - } - - /* Compute the initial gradient in the search direction. */ - vecdot(&dginit, g, s, n); - - /* Make sure that s points to a descent direction. */ - if (0 < dginit) { - return LBFGSERR_INCREASEGRADIENT; - } - - /* Initialize local variables. */ - brackt = 0; - stage1 = 1; - finit = *f; - dgtest = param->ftol * dginit; - width = param->max_step - param->min_step; - prev_width = 2.0 * width; - - /* - The variables stx, fx, dgx contain the values of the step, - function, and directional derivative at the best step. - The variables sty, fy, dgy contain the value of the step, - function, and derivative at the other endpoint of - the interval of uncertainty. - The variables stp, f, dg contain the values of the step, - function, and derivative at the current step. - */ - stx = sty = 0.; - fx = fy = finit; - dgx = dgy = dginit; - - for (;;) { - /* - Set the minimum and maximum steps to correspond to the - present interval of uncertainty. - */ - if (brackt) { - stmin = min2(stx, sty); - stmax = max2(stx, sty); - } else { - stmin = stx; - stmax = *stp + 4.0 * (*stp - stx); - } - - /* Clip the step in the range of [stpmin, stpmax]. */ - if (*stp < param->min_step) *stp = param->min_step; - if (param->max_step < *stp) *stp = param->max_step; - - /* - If an unusual termination is to occur then let - stp be the lowest point obtained so far. - */ - if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) { - *stp = stx; - } - - /* - Compute the current value of x: - x <- x + (*stp) * s. - */ - veccpy(x, xp, n); - vecadd(x, s, *stp, n); - - /* Evaluate the function and gradient values. */ - *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); - vecdot(&dg, g, s, n); - - ftest1 = finit + *stp * dgtest; - ++count; - - /* Test for errors and convergence. */ - if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) { - /* Rounding errors prevent further progress. */ - return LBFGSERR_ROUNDING_ERROR; - } - if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) { - /* The step is the maximum value. */ - return LBFGSERR_MAXIMUMSTEP; - } - if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) { - /* The step is the minimum value. */ - return LBFGSERR_MINIMUMSTEP; - } - if (brackt && (stmax - stmin) <= param->xtol * stmax) { - /* Relative width of the interval of uncertainty is at most xtol. */ - return LBFGSERR_WIDTHTOOSMALL; - } - if (param->max_linesearch <= count) { - /* Maximum number of iteration. */ - return LBFGSERR_MAXIMUMLINESEARCH; - } - if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) { - /* The sufficient decrease condition and the directional derivative condition hold. */ - return count; - } - - /* - In the first stage we seek a step for which the modified - function has a nonpositive value and nonnegative derivative. - */ - if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) { - stage1 = 0; - } - - /* - A modified function is used to predict the step only if - we have not obtained a step for which the modified - function has a nonpositive function value and nonnegative - derivative, and if a lower function value has been - obtained but the decrease is not sufficient. - */ - if (stage1 && ftest1 < *f && *f <= fx) { - /* Define the modified function and derivative values. */ - fm = *f - *stp * dgtest; - fxm = fx - stx * dgtest; - fym = fy - sty * dgtest; - dgm = dg - dgtest; - dgxm = dgx - dgtest; - dgym = dgy - dgtest; - - /* - Call update_trial_interval() to update the interval of - uncertainty and to compute the new step. - */ - uinfo = update_trial_interval( - &stx, &fxm, &dgxm, - &sty, &fym, &dgym, - stp, &fm, &dgm, - stmin, stmax, &brackt - ); - - /* Reset the function and gradient values for f. */ - fx = fxm + stx * dgtest; - fy = fym + sty * dgtest; - dgx = dgxm + dgtest; - dgy = dgym + dgtest; - } else { - /* - Call update_trial_interval() to update the interval of - uncertainty and to compute the new step. - */ - uinfo = update_trial_interval( - &stx, &fx, &dgx, - &sty, &fy, &dgy, - stp, f, &dg, - stmin, stmax, &brackt - ); - } - - /* - Force a sufficient decrease in the interval of uncertainty. - */ - if (brackt) { - if (0.66 * prev_width <= fabs(sty - stx)) { - *stp = stx + 0.5 * (sty - stx); - } - prev_width = width; - width = fabs(sty - stx); - } - } - - return LBFGSERR_LOGICERROR; -} - - - -/** - * Define the local variables for computing minimizers. - */ -#define USES_MINIMIZER \ - lbfgsfloatval_t a, d, gamma, theta, p, q, r, s; - -/** - * Find a minimizer of an interpolated cubic function. - * @param cm The minimizer of the interpolated cubic. - * @param u The value of one point, u. - * @param fu The value of f(u). - * @param du The value of f'(u). - * @param v The value of another point, v. - * @param fv The value of f(v). - * @param du The value of f'(v). - */ -#define CUBIC_MINIMIZER(cm, u, fu, du, v, fv, dv) \ - d = (v) - (u); \ - theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \ - p = fabs(theta); \ - q = fabs(du); \ - r = fabs(dv); \ - s = max3(p, q, r); \ - /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \ - a = theta / s; \ - gamma = s * sqrt(a * a - ((du) / s) * ((dv) / s)); \ - if ((v) < (u)) gamma = -gamma; \ - p = gamma - (du) + theta; \ - q = gamma - (du) + gamma + (dv); \ - r = p / q; \ - (cm) = (u) + r * d; - -/** - * Find a minimizer of an interpolated cubic function. - * @param cm The minimizer of the interpolated cubic. - * @param u The value of one point, u. - * @param fu The value of f(u). - * @param du The value of f'(u). - * @param v The value of another point, v. - * @param fv The value of f(v). - * @param du The value of f'(v). - * @param xmin The maximum value. - * @param xmin The minimum value. - */ -#define CUBIC_MINIMIZER2(cm, u, fu, du, v, fv, dv, xmin, xmax) \ - d = (v) - (u); \ - theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \ - p = fabs(theta); \ - q = fabs(du); \ - r = fabs(dv); \ - s = max3(p, q, r); \ - /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \ - a = theta / s; \ - gamma = s * sqrt(max2(0, a * a - ((du) / s) * ((dv) / s))); \ - if ((u) < (v)) gamma = -gamma; \ - p = gamma - (dv) + theta; \ - q = gamma - (dv) + gamma + (du); \ - r = p / q; \ - if (r < 0. && gamma != 0.) { \ - (cm) = (v) - r * d; \ - } else if (a < 0) { \ - (cm) = (xmax); \ - } else { \ - (cm) = (xmin); \ - } - -/** - * Find a minimizer of an interpolated quadratic function. - * @param qm The minimizer of the interpolated quadratic. - * @param u The value of one point, u. - * @param fu The value of f(u). - * @param du The value of f'(u). - * @param v The value of another point, v. - * @param fv The value of f(v). - */ -#define QUARD_MINIMIZER(qm, u, fu, du, v, fv) \ - a = (v) - (u); \ - (qm) = (u) + (du) / (((fu) - (fv)) / a + (du)) / 2 * a; - -/** - * Find a minimizer of an interpolated quadratic function. - * @param qm The minimizer of the interpolated quadratic. - * @param u The value of one point, u. - * @param du The value of f'(u). - * @param v The value of another point, v. - * @param dv The value of f'(v). - */ -#define QUARD_MINIMIZER2(qm, u, du, v, dv) \ - a = (u) - (v); \ - (qm) = (v) + (dv) / ((dv) - (du)) * a; - -/** - * Update a safeguarded trial value and interval for line search. - * - * The parameter x represents the step with the least function value. - * The parameter t represents the current step. This function assumes - * that the derivative at the point of x in the direction of the step. - * If the bracket is set to true, the minimizer has been bracketed in - * an interval of uncertainty with endpoints between x and y. - * - * @param x The pointer to the value of one endpoint. - * @param fx The pointer to the value of f(x). - * @param dx The pointer to the value of f'(x). - * @param y The pointer to the value of another endpoint. - * @param fy The pointer to the value of f(y). - * @param dy The pointer to the value of f'(y). - * @param t The pointer to the value of the trial value, t. - * @param ft The pointer to the value of f(t). - * @param dt The pointer to the value of f'(t). - * @param tmin The minimum value for the trial value, t. - * @param tmax The maximum value for the trial value, t. - * @param brackt The pointer to the predicate if the trial value is - * bracketed. - * @retval int Status value. Zero indicates a normal termination. - * - * @see - * Jorge J. More and David J. Thuente. Line search algorithm with - * guaranteed sufficient decrease. ACM Transactions on Mathematical - * Software (TOMS), Vol 20, No 3, pp. 286-307, 1994. - */ -static int update_trial_interval( - lbfgsfloatval_t *x, - lbfgsfloatval_t *fx, - lbfgsfloatval_t *dx, - lbfgsfloatval_t *y, - lbfgsfloatval_t *fy, - lbfgsfloatval_t *dy, - lbfgsfloatval_t *t, - lbfgsfloatval_t *ft, - lbfgsfloatval_t *dt, - const lbfgsfloatval_t tmin, - const lbfgsfloatval_t tmax, - int *brackt - ) -{ - int bound; - int dsign = fsigndiff(dt, dx); - lbfgsfloatval_t mc; /* minimizer of an interpolated cubic. */ - lbfgsfloatval_t mq; /* minimizer of an interpolated quadratic. */ - lbfgsfloatval_t newt; /* new trial value. */ - USES_MINIMIZER; /* for CUBIC_MINIMIZER and QUARD_MINIMIZER. */ - - /* Check the input parameters for errors. */ - if (*brackt) { - if (*t <= min2(*x, *y) || max2(*x, *y) <= *t) { - /* The trival value t is out of the interval. */ - return LBFGSERR_OUTOFINTERVAL; - } - if (0. <= *dx * (*t - *x)) { - /* The function must decrease from x. */ - return LBFGSERR_INCREASEGRADIENT; - } - if (tmax < tmin) { - /* Incorrect tmin and tmax specified. */ - return LBFGSERR_INCORRECT_TMINMAX; - } - } - - /* - Trial value selection. - */ - if (*fx < *ft) { - /* - Case 1: a higher function value. - The minimum is brackt. If the cubic minimizer is closer - to x than the quadratic one, the cubic one is taken, else - the average of the minimizers is taken. - */ - *brackt = 1; - bound = 1; - CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt); - QUARD_MINIMIZER(mq, *x, *fx, *dx, *t, *ft); - if (fabs(mc - *x) < fabs(mq - *x)) { - newt = mc; - } else { - newt = mc + 0.5 * (mq - mc); - } - } else if (dsign) { - /* - Case 2: a lower function value and derivatives of - opposite sign. The minimum is brackt. If the cubic - minimizer is closer to x than the quadratic (secant) one, - the cubic one is taken, else the quadratic one is taken. - */ - *brackt = 1; - bound = 0; - CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt); - QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt); - if (fabs(mc - *t) > fabs(mq - *t)) { - newt = mc; - } else { - newt = mq; - } - } else if (fabs(*dt) < fabs(*dx)) { - /* - Case 3: a lower function value, derivatives of the - same sign, and the magnitude of the derivative decreases. - The cubic minimizer is only used if the cubic tends to - infinity in the direction of the minimizer or if the minimum - of the cubic is beyond t. Otherwise the cubic minimizer is - defined to be either tmin or tmax. The quadratic (secant) - minimizer is also computed and if the minimum is brackt - then the the minimizer closest to x is taken, else the one - farthest away is taken. - */ - bound = 1; - CUBIC_MINIMIZER2(mc, *x, *fx, *dx, *t, *ft, *dt, tmin, tmax); - QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt); - if (*brackt) { - if (fabs(*t - mc) < fabs(*t - mq)) { - newt = mc; - } else { - newt = mq; - } - } else { - if (fabs(*t - mc) > fabs(*t - mq)) { - newt = mc; - } else { - newt = mq; - } - } - } else { - /* - Case 4: a lower function value, derivatives of the - same sign, and the magnitude of the derivative does - not decrease. If the minimum is not brackt, the step - is either tmin or tmax, else the cubic minimizer is taken. - */ - bound = 0; - if (*brackt) { - CUBIC_MINIMIZER(newt, *t, *ft, *dt, *y, *fy, *dy); - } else if (*x < *t) { - newt = tmax; - } else { - newt = tmin; - } - } - - /* - Update the interval of uncertainty. This update does not - depend on the new step or the case analysis above. - - - Case a: if f(x) < f(t), - x <- x, y <- t. - - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0, - x <- t, y <- y. - - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0, - x <- t, y <- x. - */ - if (*fx < *ft) { - /* Case a */ - *y = *t; - *fy = *ft; - *dy = *dt; - } else { - /* Case c */ - if (dsign) { - *y = *x; - *fy = *fx; - *dy = *dx; - } - /* Cases b and c */ - *x = *t; - *fx = *ft; - *dx = *dt; - } - - /* Clip the new trial value in [tmin, tmax]. */ - if (tmax < newt) newt = tmax; - if (newt < tmin) newt = tmin; - - /* - Redefine the new trial value if it is close to the upper bound - of the interval. - */ - if (*brackt && bound) { - mq = *x + 0.66 * (*y - *x); - if (*x < *y) { - if (mq < newt) newt = mq; - } else { - if (newt < mq) newt = mq; - } - } - - /* Return the new trial value. */ - *t = newt; - return 0; -} - - - - - -static lbfgsfloatval_t owlqn_x1norm( - const lbfgsfloatval_t* x, - const int start, - const int n - ) -{ - int i; - lbfgsfloatval_t norm = 0.; - - for (i = start;i < n;++i) { - norm += fabs(x[i]); - } - - return norm; -} - -static void owlqn_pseudo_gradient( - lbfgsfloatval_t* pg, - const lbfgsfloatval_t* x, - const lbfgsfloatval_t* g, - const int n, - const lbfgsfloatval_t c, - const int start, - const int end - ) -{ - int i; - - /* Compute the negative of gradients. */ - for (i = 0;i < start;++i) { - pg[i] = g[i]; - } - - /* Compute the psuedo-gradients. */ - for (i = start;i < end;++i) { - if (x[i] < 0.) { - /* Differentiable. */ - pg[i] = g[i] - c; - } else if (0. < x[i]) { - /* Differentiable. */ - pg[i] = g[i] + c; - } else { - if (g[i] < -c) { - /* Take the right partial derivative. */ - pg[i] = g[i] + c; - } else if (c < g[i]) { - /* Take the left partial derivative. */ - pg[i] = g[i] - c; - } else { - pg[i] = 0.; - } - } - } - - for (i = end;i < n;++i) { - pg[i] = g[i]; - } -} - -static void owlqn_project( - lbfgsfloatval_t* d, - const lbfgsfloatval_t* sign, - const int start, - const int end - ) -{ - int i; - - for (i = start;i < end;++i) { - if (d[i] * sign[i] <= 0) { - d[i] = 0; - } - } -} From 0c85dff828348edfa79bd195bbe5200262c64687 Mon Sep 17 00:00:00 2001 From: libLBFGS Upstream Date: Tue, 21 Dec 2010 01:16:22 +0900 Subject: [PATCH 3/4] libLBFGS 2010-12-21 (27e905f3) Code extracted from: https://github.com/chokkan/liblbfgs.git at commit 27e905f386bfc2123579c1686344f99b439709ea (v1.10). --- .gitattributes | 1 + COPYING | 22 + include/lbfgs.h | 745 +++++++++++++++++++ lib/arithmetic_ansi.h | 133 ++++ lib/arithmetic_sse_double.h | 294 ++++++++ lib/arithmetic_sse_float.h | 298 ++++++++ lib/lbfgs.c | 1371 +++++++++++++++++++++++++++++++++++ 7 files changed, 2864 insertions(+) create mode 100644 .gitattributes create mode 100644 COPYING create mode 100644 include/lbfgs.h create mode 100644 lib/arithmetic_ansi.h create mode 100644 lib/arithmetic_sse_double.h create mode 100644 lib/arithmetic_sse_float.h create mode 100644 lib/lbfgs.c diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..562b12e16eb --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* -whitespace diff --git a/COPYING b/COPYING new file mode 100644 index 00000000000..406cbe215a6 --- /dev/null +++ b/COPYING @@ -0,0 +1,22 @@ +The MIT License + +Copyright (c) 1990 Jorge Nocedal +Copyright (c) 2007-2010 Naoaki Okazaki + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/include/lbfgs.h b/include/lbfgs.h new file mode 100644 index 00000000000..cd944a3344f --- /dev/null +++ b/include/lbfgs.h @@ -0,0 +1,745 @@ +/* + * C library of Limited memory BFGS (L-BFGS). + * + * Copyright (c) 1990, Jorge Nocedal + * Copyright (c) 2007-2010 Naoaki Okazaki + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* $Id$ */ + +#ifndef __LBFGS_H__ +#define __LBFGS_H__ + +#ifdef __cplusplus +extern "C" { +#endif/*__cplusplus*/ + +/* + * The default precision of floating point values is 64bit (double). + */ +#ifndef LBFGS_FLOAT +#define LBFGS_FLOAT 64 +#endif/*LBFGS_FLOAT*/ + +/* + * Activate optimization routines for IEEE754 floating point values. + */ +#ifndef LBFGS_IEEE_FLOAT +#define LBFGS_IEEE_FLOAT 1 +#endif/*LBFGS_IEEE_FLOAT*/ + +#if LBFGS_FLOAT == 32 +typedef float lbfgsfloatval_t; + +#elif LBFGS_FLOAT == 64 +typedef double lbfgsfloatval_t; + +#else +#error "libLBFGS supports single (float; LBFGS_FLOAT = 32) or double (double; LBFGS_FLOAT=64) precision only." + +#endif + + +/** + * \addtogroup liblbfgs_api libLBFGS API + * @{ + * + * The libLBFGS API. + */ + +/** + * Return values of lbfgs(). + * + * Roughly speaking, a negative value indicates an error. + */ +enum { + /** L-BFGS reaches convergence. */ + LBFGS_SUCCESS = 0, + LBFGS_CONVERGENCE = 0, + LBFGS_STOP, + /** The initial variables already minimize the objective function. */ + LBFGS_ALREADY_MINIMIZED, + + /** Unknown error. */ + LBFGSERR_UNKNOWNERROR = -1024, + /** Logic error. */ + LBFGSERR_LOGICERROR, + /** Insufficient memory. */ + LBFGSERR_OUTOFMEMORY, + /** The minimization process has been canceled. */ + LBFGSERR_CANCELED, + /** Invalid number of variables specified. */ + LBFGSERR_INVALID_N, + /** Invalid number of variables (for SSE) specified. */ + LBFGSERR_INVALID_N_SSE, + /** The array x must be aligned to 16 (for SSE). */ + LBFGSERR_INVALID_X_SSE, + /** Invalid parameter lbfgs_parameter_t::epsilon specified. */ + LBFGSERR_INVALID_EPSILON, + /** Invalid parameter lbfgs_parameter_t::past specified. */ + LBFGSERR_INVALID_TESTPERIOD, + /** Invalid parameter lbfgs_parameter_t::delta specified. */ + LBFGSERR_INVALID_DELTA, + /** Invalid parameter lbfgs_parameter_t::linesearch specified. */ + LBFGSERR_INVALID_LINESEARCH, + /** Invalid parameter lbfgs_parameter_t::max_step specified. */ + LBFGSERR_INVALID_MINSTEP, + /** Invalid parameter lbfgs_parameter_t::max_step specified. */ + LBFGSERR_INVALID_MAXSTEP, + /** Invalid parameter lbfgs_parameter_t::ftol specified. */ + LBFGSERR_INVALID_FTOL, + /** Invalid parameter lbfgs_parameter_t::wolfe specified. */ + LBFGSERR_INVALID_WOLFE, + /** Invalid parameter lbfgs_parameter_t::gtol specified. */ + LBFGSERR_INVALID_GTOL, + /** Invalid parameter lbfgs_parameter_t::xtol specified. */ + LBFGSERR_INVALID_XTOL, + /** Invalid parameter lbfgs_parameter_t::max_linesearch specified. */ + LBFGSERR_INVALID_MAXLINESEARCH, + /** Invalid parameter lbfgs_parameter_t::orthantwise_c specified. */ + LBFGSERR_INVALID_ORTHANTWISE, + /** Invalid parameter lbfgs_parameter_t::orthantwise_start specified. */ + LBFGSERR_INVALID_ORTHANTWISE_START, + /** Invalid parameter lbfgs_parameter_t::orthantwise_end specified. */ + LBFGSERR_INVALID_ORTHANTWISE_END, + /** The line-search step went out of the interval of uncertainty. */ + LBFGSERR_OUTOFINTERVAL, + /** A logic error occurred; alternatively, the interval of uncertainty + became too small. */ + LBFGSERR_INCORRECT_TMINMAX, + /** A rounding error occurred; alternatively, no line-search step + satisfies the sufficient decrease and curvature conditions. */ + LBFGSERR_ROUNDING_ERROR, + /** The line-search step became smaller than lbfgs_parameter_t::min_step. */ + LBFGSERR_MINIMUMSTEP, + /** The line-search step became larger than lbfgs_parameter_t::max_step. */ + LBFGSERR_MAXIMUMSTEP, + /** The line-search routine reaches the maximum number of evaluations. */ + LBFGSERR_MAXIMUMLINESEARCH, + /** The algorithm routine reaches the maximum number of iterations. */ + LBFGSERR_MAXIMUMITERATION, + /** Relative width of the interval of uncertainty is at most + lbfgs_parameter_t::xtol. */ + LBFGSERR_WIDTHTOOSMALL, + /** A logic error (negative line-search step) occurred. */ + LBFGSERR_INVALIDPARAMETERS, + /** The current search direction increases the objective function value. */ + LBFGSERR_INCREASEGRADIENT, +}; + +/** + * Line search algorithms. + */ +enum { + /** The default algorithm (MoreThuente method). */ + LBFGS_LINESEARCH_DEFAULT = 0, + /** MoreThuente method proposd by More and Thuente. */ + LBFGS_LINESEARCH_MORETHUENTE = 0, + /** + * Backtracking method with the Armijo condition. + * The backtracking method finds the step length such that it satisfies + * the sufficient decrease (Armijo) condition, + * - f(x + a * d) <= f(x) + lbfgs_parameter_t::ftol * a * g(x)^T d, + * + * where x is the current point, d is the current search direction, and + * a is the step length. + */ + LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1, + /** The backtracking method with the defualt (regular Wolfe) condition. */ + LBFGS_LINESEARCH_BACKTRACKING = 2, + /** + * Backtracking method with regular Wolfe condition. + * The backtracking method finds the step length such that it satisfies + * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) + * and the curvature condition, + * - g(x + a * d)^T d >= lbfgs_parameter_t::wolfe * g(x)^T d, + * + * where x is the current point, d is the current search direction, and + * a is the step length. + */ + LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2, + /** + * Backtracking method with strong Wolfe condition. + * The backtracking method finds the step length such that it satisfies + * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) + * and the following condition, + * - |g(x + a * d)^T d| <= lbfgs_parameter_t::wolfe * |g(x)^T d|, + * + * where x is the current point, d is the current search direction, and + * a is the step length. + */ + LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3, +}; + +/** + * L-BFGS optimization parameters. + * Call lbfgs_parameter_init() function to initialize parameters to the + * default values. + */ +typedef struct { + /** + * The number of corrections to approximate the inverse hessian matrix. + * The L-BFGS routine stores the computation results of previous \ref m + * iterations to approximate the inverse hessian matrix of the current + * iteration. This parameter controls the size of the limited memories + * (corrections). The default value is \c 6. Values less than \c 3 are + * not recommended. Large values will result in excessive computing time. + */ + int m; + + /** + * Epsilon for convergence test. + * This parameter determines the accuracy with which the solution is to + * be found. A minimization terminates when + * ||g|| < \ref epsilon * max(1, ||x||), + * where ||.|| denotes the Euclidean (L2) norm. The default value is + * \c 1e-5. + */ + lbfgsfloatval_t epsilon; + + /** + * Distance for delta-based convergence test. + * This parameter determines the distance, in iterations, to compute + * the rate of decrease of the objective function. If the value of this + * parameter is zero, the library does not perform the delta-based + * convergence test. The default value is \c 0. + */ + int past; + + /** + * Delta for convergence test. + * This parameter determines the minimum rate of decrease of the + * objective function. The library stops iterations when the + * following condition is met: + * (f' - f) / f < \ref delta, + * where f' is the objective value of \ref past iterations ago, and f is + * the objective value of the current iteration. + * The default value is \c 0. + */ + lbfgsfloatval_t delta; + + /** + * The maximum number of iterations. + * The lbfgs() function terminates an optimization process with + * ::LBFGSERR_MAXIMUMITERATION status code when the iteration count + * exceedes this parameter. Setting this parameter to zero continues an + * optimization process until a convergence or error. The default value + * is \c 0. + */ + int max_iterations; + + /** + * The line search algorithm. + * This parameter specifies a line search algorithm to be used by the + * L-BFGS routine. + */ + int linesearch; + + /** + * The maximum number of trials for the line search. + * This parameter controls the number of function and gradients evaluations + * per iteration for the line search routine. The default value is \c 20. + */ + int max_linesearch; + + /** + * The minimum step of the line search routine. + * The default value is \c 1e-20. This value need not be modified unless + * the exponents are too large for the machine being used, or unless the + * problem is extremely badly scaled (in which case the exponents should + * be increased). + */ + lbfgsfloatval_t min_step; + + /** + * The maximum step of the line search. + * The default value is \c 1e+20. This value need not be modified unless + * the exponents are too large for the machine being used, or unless the + * problem is extremely badly scaled (in which case the exponents should + * be increased). + */ + lbfgsfloatval_t max_step; + + /** + * A parameter to control the accuracy of the line search routine. + * The default value is \c 1e-4. This parameter should be greater + * than zero and smaller than \c 0.5. + */ + lbfgsfloatval_t ftol; + + /** + * A coefficient for the Wolfe condition. + * This parameter is valid only when the backtracking line-search + * algorithm is used with the Wolfe condition, + * ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or + * ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE . + * The default value is \c 0.9. This parameter should be greater + * the \ref ftol parameter and smaller than \c 1.0. + */ + lbfgsfloatval_t wolfe; + + /** + * A parameter to control the accuracy of the line search routine. + * The default value is \c 0.9. If the function and gradient + * evaluations are inexpensive with respect to the cost of the + * iteration (which is sometimes the case when solving very large + * problems) it may be advantageous to set this parameter to a small + * value. A typical small value is \c 0.1. This parameter shuold be + * greater than the \ref ftol parameter (\c 1e-4) and smaller than + * \c 1.0. + */ + lbfgsfloatval_t gtol; + + /** + * The machine precision for floating-point values. + * This parameter must be a positive value set by a client program to + * estimate the machine precision. The line search routine will terminate + * with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width + * of the interval of uncertainty is less than this parameter. + */ + lbfgsfloatval_t xtol; + + /** + * Coeefficient for the L1 norm of variables. + * This parameter should be set to zero for standard minimization + * problems. Setting this parameter to a positive value activates + * Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which + * minimizes the objective function F(x) combined with the L1 norm |x| + * of the variables, {F(x) + C |x|}. This parameter is the coeefficient + * for the |x|, i.e., C. As the L1 norm |x| is not differentiable at + * zero, the library modifies function and gradient evaluations from + * a client program suitably; a client program thus have only to return + * the function value F(x) and gradients G(x) as usual. The default value + * is zero. + */ + lbfgsfloatval_t orthantwise_c; + + /** + * Start index for computing L1 norm of the variables. + * This parameter is valid only for OWL-QN method + * (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N) + * specifies the index number from which the library computes the + * L1 norm of the variables x, + * |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| . + * In other words, variables x_1, ..., x_{b-1} are not used for + * computing the L1 norm. Setting b (0 < b < N), one can protect + * variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic + * regression) from being regularized. The default value is zero. + */ + int orthantwise_start; + + /** + * End index for computing L1 norm of the variables. + * This parameter is valid only for OWL-QN method + * (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N) + * specifies the index number at which the library stops computing the + * L1 norm of the variables x, + */ + int orthantwise_end; +} lbfgs_parameter_t; + + +/** + * Callback interface to provide objective function and gradient evaluations. + * + * The lbfgs() function call this function to obtain the values of objective + * function and its gradients when needed. A client program must implement + * this function to evaluate the values of the objective function and its + * gradients, given current values of variables. + * + * @param instance The user data sent for lbfgs() function by the client. + * @param x The current values of variables. + * @param g The gradient vector. The callback function must compute + * the gradient values for the current variables. + * @param n The number of variables. + * @param step The current step of the line search routine. + * @retval lbfgsfloatval_t The value of the objective function for the current + * variables. + */ +typedef lbfgsfloatval_t (*lbfgs_evaluate_t)( + void *instance, + const lbfgsfloatval_t *x, + lbfgsfloatval_t *g, + const int n, + const lbfgsfloatval_t step + ); + +/** + * Callback interface to receive the progress of the optimization process. + * + * The lbfgs() function call this function for each iteration. Implementing + * this function, a client program can store or display the current progress + * of the optimization process. + * + * @param instance The user data sent for lbfgs() function by the client. + * @param x The current values of variables. + * @param g The current gradient values of variables. + * @param fx The current value of the objective function. + * @param xnorm The Euclidean norm of the variables. + * @param gnorm The Euclidean norm of the gradients. + * @param step The line-search step used for this iteration. + * @param n The number of variables. + * @param k The iteration count. + * @param ls The number of evaluations called for this iteration. + * @retval int Zero to continue the optimization process. Returning a + * non-zero value will cancel the optimization process. + */ +typedef int (*lbfgs_progress_t)( + void *instance, + const lbfgsfloatval_t *x, + const lbfgsfloatval_t *g, + const lbfgsfloatval_t fx, + const lbfgsfloatval_t xnorm, + const lbfgsfloatval_t gnorm, + const lbfgsfloatval_t step, + int n, + int k, + int ls + ); + +/* +A user must implement a function compatible with ::lbfgs_evaluate_t (evaluation +callback) and pass the pointer to the callback function to lbfgs() arguments. +Similarly, a user can implement a function compatible with ::lbfgs_progress_t +(progress callback) to obtain the current progress (e.g., variables, function +value, ||G||, etc) and to cancel the iteration process if necessary. +Implementation of a progress callback is optional: a user can pass \c NULL if +progress notification is not necessary. + +In addition, a user must preserve two requirements: + - The number of variables must be multiples of 16 (this is not 4). + - The memory block of variable array ::x must be aligned to 16. + +This algorithm terminates an optimization +when: + + ||G|| < \epsilon \cdot \max(1, ||x||) . + +In this formula, ||.|| denotes the Euclidean norm. +*/ + +/** + * Start a L-BFGS optimization. + * + * @param n The number of variables. + * @param x The array of variables. A client program can set + * default values for the optimization and receive the + * optimization result through this array. This array + * must be allocated by ::lbfgs_malloc function + * for libLBFGS built with SSE/SSE2 optimization routine + * enabled. The library built without SSE/SSE2 + * optimization does not have such a requirement. + * @param ptr_fx The pointer to the variable that receives the final + * value of the objective function for the variables. + * This argument can be set to \c NULL if the final + * value of the objective function is unnecessary. + * @param proc_evaluate The callback function to provide function and + * gradient evaluations given a current values of + * variables. A client program must implement a + * callback function compatible with \ref + * lbfgs_evaluate_t and pass the pointer to the + * callback function. + * @param proc_progress The callback function to receive the progress + * (the number of iterations, the current value of + * the objective function) of the minimization + * process. This argument can be set to \c NULL if + * a progress report is unnecessary. + * @param instance A user data for the client program. The callback + * functions will receive the value of this argument. + * @param param The pointer to a structure representing parameters for + * L-BFGS optimization. A client program can set this + * parameter to \c NULL to use the default parameters. + * Call lbfgs_parameter_init() function to fill a + * structure with the default values. + * @retval int The status code. This function returns zero if the + * minimization process terminates without an error. A + * non-zero value indicates an error. + */ +int lbfgs( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *ptr_fx, + lbfgs_evaluate_t proc_evaluate, + lbfgs_progress_t proc_progress, + void *instance, + lbfgs_parameter_t *param + ); + +/** + * Initialize L-BFGS parameters to the default values. + * + * Call this function to fill a parameter structure with the default values + * and overwrite parameter values if necessary. + * + * @param param The pointer to the parameter structure. + */ +void lbfgs_parameter_init(lbfgs_parameter_t *param); + +/** + * Allocate an array for variables. + * + * This function allocates an array of variables for the convenience of + * ::lbfgs function; the function has a requreiemt for a variable array + * when libLBFGS is built with SSE/SSE2 optimization routines. A user does + * not have to use this function for libLBFGS built without SSE/SSE2 + * optimization. + * + * @param n The number of variables. + */ +lbfgsfloatval_t* lbfgs_malloc(int n); + +/** + * Free an array of variables. + * + * @param x The array of variables allocated by ::lbfgs_malloc + * function. + */ +void lbfgs_free(lbfgsfloatval_t *x); + +/** @} */ + +#ifdef __cplusplus +} +#endif/*__cplusplus*/ + + + +/** +@mainpage libLBFGS: a library of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) + +@section intro Introduction + +This library is a C port of the implementation of Limited-memory +Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal. +The original FORTRAN source code is available at: +http://www.ece.northwestern.edu/~nocedal/lbfgs.html + +The L-BFGS method solves the unconstrainted minimization problem, + +
+    minimize F(x), x = (x1, x2, ..., xN),
+
+ +only if the objective function F(x) and its gradient G(x) are computable. The +well-known Newton's method requires computation of the inverse of the hessian +matrix of the objective function. However, the computational cost for the +inverse hessian matrix is expensive especially when the objective function +takes a large number of variables. The L-BFGS method iteratively finds a +minimizer by approximating the inverse hessian matrix by information from last +m iterations. This innovation saves the memory storage and computational time +drastically for large-scaled problems. + +Among the various ports of L-BFGS, this library provides several features: +- Optimization with L1-norm (Orthant-Wise Limited-memory Quasi-Newton + (OWL-QN) method): + In addition to standard minimization problems, the library can minimize + a function F(x) combined with L1-norm |x| of the variables, + {F(x) + C |x|}, where C is a constant scalar parameter. This feature is + useful for estimating parameters of sparse log-linear models (e.g., + logistic regression and maximum entropy) with L1-regularization (or + Laplacian prior). +- Clean C code: + Unlike C codes generated automatically by f2c (Fortran 77 into C converter), + this port includes changes based on my interpretations, improvements, + optimizations, and clean-ups so that the ported code would be well-suited + for a C code. In addition to comments inherited from the original code, + a number of comments were added through my interpretations. +- Callback interface: + The library receives function and gradient values via a callback interface. + The library also notifies the progress of the optimization by invoking a + callback function. In the original implementation, a user had to set + function and gradient values every time the function returns for obtaining + updated values. +- Thread safe: + The library is thread-safe, which is the secondary gain from the callback + interface. +- Cross platform. The source code can be compiled on Microsoft Visual + Studio 2010, GNU C Compiler (gcc), etc. +- Configurable precision: A user can choose single-precision (float) + or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro. +- SSE/SSE2 optimization: + This library includes SSE/SSE2 optimization (written in compiler intrinsics) + for vector arithmetic operations on Intel/AMD processors. The library uses + SSE for float values and SSE2 for double values. The SSE/SSE2 optimization + routine is disabled by default. + +This library is used by: +- CRFsuite: A fast implementation of Conditional Random Fields (CRFs) +- Classias: A collection of machine-learning algorithms for classification +- mlegp: an R package for maximum likelihood estimates for Gaussian processes +- imaging2: the imaging2 class library +- Algorithm::LBFGS - Perl extension for L-BFGS +- YAP-LBFGS (an interface to call libLBFGS from YAP Prolog) + +@section download Download + +- Source code +- GitHub repository + +libLBFGS is distributed under the term of the +MIT license. + +@section changelog History +- Version 1.10 (2010-12-22): + - Fixed compiling errors on Mac OS X; this patch was kindly submitted by + Nic Schraudolph. + - Reduced compiling warnings on Mac OS X; this patch was kindly submitted + by Tamas Nepusz. + - Replaced memalign() with posix_memalign(). + - Updated solution and project files for Microsoft Visual Studio 2010. +- Version 1.9 (2010-01-29): + - Fixed a mistake in checking the validity of the parameters "ftol" and + "wolfe"; this was discovered by Kevin S. Van Horn. +- Version 1.8 (2009-07-13): + - Accepted the patch submitted by Takashi Imamichi; + the backtracking method now has three criteria for choosing the step + length: + - ::LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: sufficient decrease (Armijo) + condition only + - ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE: regular Wolfe condition + (sufficient decrease condition + curvature condition) + - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: strong Wolfe condition + - Updated the documentation to explain the above three criteria. +- Version 1.7 (2009-02-28): + - Improved OWL-QN routines for stability. + - Removed the support of OWL-QN method in MoreThuente algorithm because + it accidentally fails in early stages of iterations for some objectives. + Because of this change, the OW-LQN method must be used with the + backtracking algorithm (::LBFGS_LINESEARCH_BACKTRACKING), or the + library returns ::LBFGSERR_INVALID_LINESEARCH. + - Renamed line search algorithms as follows: + - ::LBFGS_LINESEARCH_BACKTRACKING: regular Wolfe condition. + - ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE: regular Wolfe condition. + - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG: strong Wolfe condition. + - Source code clean-up. +- Version 1.6 (2008-11-02): + - Improved line-search algorithm with strong Wolfe condition, which was + contributed by Takashi Imamichi. This routine is now default for + ::LBFGS_LINESEARCH_BACKTRACKING. The previous line search algorithm + with regular Wolfe condition is still available as + ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE. + - Configurable stop index for L1-norm computation. A member variable + ::lbfgs_parameter_t::orthantwise_end was added to specify the index + number at which the library stops computing the L1 norm of the + variables. This is useful to prevent some variables from being + regularized by the OW-LQN method. + - A sample program written in C++ (sample/sample.cpp). +- Version 1.5 (2008-07-10): + - Configurable starting index for L1-norm computation. A member variable + ::lbfgs_parameter_t::orthantwise_start was added to specify the index + number from which the library computes the L1 norm of the variables. + This is useful to prevent some variables from being regularized by the + OWL-QN method. + - Fixed a zero-division error when the initial variables have already + been a minimizer (reported by Takashi Imamichi). In this case, the + library returns ::LBFGS_ALREADY_MINIMIZED status code. + - Defined ::LBFGS_SUCCESS status code as zero; removed unused constants, + LBFGSFALSE and LBFGSTRUE. + - Fixed a compile error in an implicit down-cast. +- Version 1.4 (2008-04-25): + - Configurable line search algorithms. A member variable + ::lbfgs_parameter_t::linesearch was added to choose either MoreThuente + method (::LBFGS_LINESEARCH_MORETHUENTE) or backtracking algorithm + (::LBFGS_LINESEARCH_BACKTRACKING). + - Fixed a bug: the previous version did not compute psuedo-gradients + properly in the line search routines for OWL-QN. This bug might quit + an iteration process too early when the OWL-QN routine was activated + (0 < ::lbfgs_parameter_t::orthantwise_c). + - Configure script for POSIX environments. + - SSE/SSE2 optimizations with GCC. + - New functions ::lbfgs_malloc and ::lbfgs_free to use SSE/SSE2 routines + transparently. It is uncessary to use these functions for libLBFGS built + without SSE/SSE2 routines; you can still use any memory allocators if + SSE/SSE2 routines are disabled in libLBFGS. +- Version 1.3 (2007-12-16): + - An API change. An argument was added to lbfgs() function to receive the + final value of the objective function. This argument can be set to + \c NULL if the final value is unnecessary. + - Fixed a null-pointer bug in the sample code (reported by Takashi Imamichi). + - Added build scripts for Microsoft Visual Studio 2005 and GCC. + - Added README file. +- Version 1.2 (2007-12-13): + - Fixed a serious bug in orthant-wise L-BFGS. + An important variable was used without initialization. +- Version 1.1 (2007-12-01): + - Implemented orthant-wise L-BFGS. + - Implemented lbfgs_parameter_init() function. + - Fixed several bugs. + - API documentation. +- Version 1.0 (2007-09-20): + - Initial release. + +@section api Documentation + +- @ref liblbfgs_api "libLBFGS API" + +@section sample Sample code + +@include sample.c + +@section ack Acknowledgements + +The L-BFGS algorithm is described in: + - Jorge Nocedal. + Updating Quasi-Newton Matrices with Limited Storage. + Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. + - Dong C. Liu and Jorge Nocedal. + On the limited memory BFGS method for large scale optimization. + Mathematical Programming B, Vol. 45, No. 3, pp. 503-528, 1989. + +The line search algorithms used in this implementation are described in: + - John E. Dennis and Robert B. Schnabel. + Numerical Methods for Unconstrained Optimization and Nonlinear + Equations, Englewood Cliffs, 1983. + - Jorge J. More and David J. Thuente. + Line search algorithm with guaranteed sufficient decrease. + ACM Transactions on Mathematical Software (TOMS), Vol. 20, No. 3, + pp. 286-307, 1994. + +This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) +method presented in: + - Galen Andrew and Jianfeng Gao. + Scalable training of L1-regularized log-linear models. + In Proceedings of the 24th International Conference on Machine + Learning (ICML 2007), pp. 33-40, 2007. + +Special thanks go to: + - Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about + OWL-QN + - Takashi Imamichi for the useful enhancements of the backtracking method + - Kevin S. Van Horn, Nic Schraudolph, and Tamas Nepusz for bug fixes + +Finally I would like to thank the original author, Jorge Nocedal, who has been +distributing the effieicnt and explanatory implementation in an open source +licence. + +@section reference Reference + +- L-BFGS by Jorge Nocedal. +- Orthant-Wise Limited-memory Quasi-Newton Optimizer for L1-regularized Objectives by Galen Andrew. +- C port (via f2c) by Taku Kudo. +- C#/C++/Delphi/VisualBasic6 port in ALGLIB. +- Computational Crystallography Toolbox includes + scitbx::lbfgs. +*/ + +#endif/*__LBFGS_H__*/ diff --git a/lib/arithmetic_ansi.h b/lib/arithmetic_ansi.h new file mode 100644 index 00000000000..fa390da37c7 --- /dev/null +++ b/lib/arithmetic_ansi.h @@ -0,0 +1,133 @@ +/* + * ANSI C implementation of vector operations. + * + * Copyright (c) 2007-2010 Naoaki Okazaki + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* $Id$ */ + +#include +#include + +#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT +#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U) +#else +#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.) +#endif/*LBFGS_IEEE_FLOAT*/ + +inline static void* vecalloc(size_t size) +{ + void *memblock = malloc(size); + if (memblock) { + memset(memblock, 0, size); + } + return memblock; +} + +inline static void vecfree(void *memblock) +{ + free(memblock); +} + +inline static void vecset(lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n) +{ + int i; + + for (i = 0;i < n;++i) { + x[i] = c; + } +} + +inline static void veccpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) +{ + int i; + + for (i = 0;i < n;++i) { + y[i] = x[i]; + } +} + +inline static void vecncpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) +{ + int i; + + for (i = 0;i < n;++i) { + y[i] = -x[i]; + } +} + +inline static void vecadd(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n) +{ + int i; + + for (i = 0;i < n;++i) { + y[i] += c * x[i]; + } +} + +inline static void vecdiff(lbfgsfloatval_t *z, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n) +{ + int i; + + for (i = 0;i < n;++i) { + z[i] = x[i] - y[i]; + } +} + +inline static void vecscale(lbfgsfloatval_t *y, const lbfgsfloatval_t c, const int n) +{ + int i; + + for (i = 0;i < n;++i) { + y[i] *= c; + } +} + +inline static void vecmul(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) +{ + int i; + + for (i = 0;i < n;++i) { + y[i] *= x[i]; + } +} + +inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n) +{ + int i; + *s = 0.; + for (i = 0;i < n;++i) { + *s += x[i] * y[i]; + } +} + +inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) +{ + vecdot(s, x, x, n); + *s = (lbfgsfloatval_t)sqrt(*s); +} + +inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) +{ + vec2norm(s, x, n); + *s = (lbfgsfloatval_t)(1.0 / *s); +} diff --git a/lib/arithmetic_sse_double.h b/lib/arithmetic_sse_double.h new file mode 100644 index 00000000000..83405eeb1f1 --- /dev/null +++ b/lib/arithmetic_sse_double.h @@ -0,0 +1,294 @@ +/* + * SSE2 implementation of vector oprations (64bit double). + * + * Copyright (c) 2007-2010 Naoaki Okazaki + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* $Id$ */ + +#include +#ifndef __APPLE__ +#include +#endif +#include + +#if 1400 <= _MSC_VER +#include +#endif/*1400 <= _MSC_VER*/ + +#if HAVE_EMMINTRIN_H +#include +#endif/*HAVE_EMMINTRIN_H*/ + +inline static void* vecalloc(size_t size) +{ +#if defined(_MSC_VER) + void *memblock = _aligned_malloc(size, 16); +#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */ + void *memblock = malloc(size); +#else + void *memblock = NULL, *p = NULL; + if (posix_memalign(&p, 16, size) == 0) { + memblock = p; + } +#endif + if (memblock != NULL) { + memset(memblock, 0, size); + } + return memblock; +} + +inline static void vecfree(void *memblock) +{ +#ifdef _MSC_VER + _aligned_free(memblock); +#else + free(memblock); +#endif +} + +#define fsigndiff(x, y) \ + ((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002) + +#define vecset(x, c, n) \ +{ \ + int i; \ + __m128d XMM0 = _mm_set1_pd(c); \ + for (i = 0;i < (n);i += 8) { \ + _mm_store_pd((x)+i , XMM0); \ + _mm_store_pd((x)+i+2, XMM0); \ + _mm_store_pd((x)+i+4, XMM0); \ + _mm_store_pd((x)+i+6, XMM0); \ + } \ +} + +#define veccpy(y, x, n) \ +{ \ + int i; \ + for (i = 0;i < (n);i += 8) { \ + __m128d XMM0 = _mm_load_pd((x)+i ); \ + __m128d XMM1 = _mm_load_pd((x)+i+2); \ + __m128d XMM2 = _mm_load_pd((x)+i+4); \ + __m128d XMM3 = _mm_load_pd((x)+i+6); \ + _mm_store_pd((y)+i , XMM0); \ + _mm_store_pd((y)+i+2, XMM1); \ + _mm_store_pd((y)+i+4, XMM2); \ + _mm_store_pd((y)+i+6, XMM3); \ + } \ +} + +#define vecncpy(y, x, n) \ +{ \ + int i; \ + for (i = 0;i < (n);i += 8) { \ + __m128d XMM0 = _mm_setzero_pd(); \ + __m128d XMM1 = _mm_setzero_pd(); \ + __m128d XMM2 = _mm_setzero_pd(); \ + __m128d XMM3 = _mm_setzero_pd(); \ + __m128d XMM4 = _mm_load_pd((x)+i ); \ + __m128d XMM5 = _mm_load_pd((x)+i+2); \ + __m128d XMM6 = _mm_load_pd((x)+i+4); \ + __m128d XMM7 = _mm_load_pd((x)+i+6); \ + XMM0 = _mm_sub_pd(XMM0, XMM4); \ + XMM1 = _mm_sub_pd(XMM1, XMM5); \ + XMM2 = _mm_sub_pd(XMM2, XMM6); \ + XMM3 = _mm_sub_pd(XMM3, XMM7); \ + _mm_store_pd((y)+i , XMM0); \ + _mm_store_pd((y)+i+2, XMM1); \ + _mm_store_pd((y)+i+4, XMM2); \ + _mm_store_pd((y)+i+6, XMM3); \ + } \ +} + +#define vecadd(y, x, c, n) \ +{ \ + int i; \ + __m128d XMM7 = _mm_set1_pd(c); \ + for (i = 0;i < (n);i += 4) { \ + __m128d XMM0 = _mm_load_pd((x)+i ); \ + __m128d XMM1 = _mm_load_pd((x)+i+2); \ + __m128d XMM2 = _mm_load_pd((y)+i ); \ + __m128d XMM3 = _mm_load_pd((y)+i+2); \ + XMM0 = _mm_mul_pd(XMM0, XMM7); \ + XMM1 = _mm_mul_pd(XMM1, XMM7); \ + XMM2 = _mm_add_pd(XMM2, XMM0); \ + XMM3 = _mm_add_pd(XMM3, XMM1); \ + _mm_store_pd((y)+i , XMM2); \ + _mm_store_pd((y)+i+2, XMM3); \ + } \ +} + +#define vecdiff(z, x, y, n) \ +{ \ + int i; \ + for (i = 0;i < (n);i += 8) { \ + __m128d XMM0 = _mm_load_pd((x)+i ); \ + __m128d XMM1 = _mm_load_pd((x)+i+2); \ + __m128d XMM2 = _mm_load_pd((x)+i+4); \ + __m128d XMM3 = _mm_load_pd((x)+i+6); \ + __m128d XMM4 = _mm_load_pd((y)+i ); \ + __m128d XMM5 = _mm_load_pd((y)+i+2); \ + __m128d XMM6 = _mm_load_pd((y)+i+4); \ + __m128d XMM7 = _mm_load_pd((y)+i+6); \ + XMM0 = _mm_sub_pd(XMM0, XMM4); \ + XMM1 = _mm_sub_pd(XMM1, XMM5); \ + XMM2 = _mm_sub_pd(XMM2, XMM6); \ + XMM3 = _mm_sub_pd(XMM3, XMM7); \ + _mm_store_pd((z)+i , XMM0); \ + _mm_store_pd((z)+i+2, XMM1); \ + _mm_store_pd((z)+i+4, XMM2); \ + _mm_store_pd((z)+i+6, XMM3); \ + } \ +} + +#define vecscale(y, c, n) \ +{ \ + int i; \ + __m128d XMM7 = _mm_set1_pd(c); \ + for (i = 0;i < (n);i += 4) { \ + __m128d XMM0 = _mm_load_pd((y)+i ); \ + __m128d XMM1 = _mm_load_pd((y)+i+2); \ + XMM0 = _mm_mul_pd(XMM0, XMM7); \ + XMM1 = _mm_mul_pd(XMM1, XMM7); \ + _mm_store_pd((y)+i , XMM0); \ + _mm_store_pd((y)+i+2, XMM1); \ + } \ +} + +#define vecmul(y, x, n) \ +{ \ + int i; \ + for (i = 0;i < (n);i += 8) { \ + __m128d XMM0 = _mm_load_pd((x)+i ); \ + __m128d XMM1 = _mm_load_pd((x)+i+2); \ + __m128d XMM2 = _mm_load_pd((x)+i+4); \ + __m128d XMM3 = _mm_load_pd((x)+i+6); \ + __m128d XMM4 = _mm_load_pd((y)+i ); \ + __m128d XMM5 = _mm_load_pd((y)+i+2); \ + __m128d XMM6 = _mm_load_pd((y)+i+4); \ + __m128d XMM7 = _mm_load_pd((y)+i+6); \ + XMM4 = _mm_mul_pd(XMM4, XMM0); \ + XMM5 = _mm_mul_pd(XMM5, XMM1); \ + XMM6 = _mm_mul_pd(XMM6, XMM2); \ + XMM7 = _mm_mul_pd(XMM7, XMM3); \ + _mm_store_pd((y)+i , XMM4); \ + _mm_store_pd((y)+i+2, XMM5); \ + _mm_store_pd((y)+i+4, XMM6); \ + _mm_store_pd((y)+i+6, XMM7); \ + } \ +} + + + +#if 3 <= __SSE__ || defined(__SSE3__) +/* + Horizontal add with haddps SSE3 instruction. The work register (rw) + is unused. + */ +#define __horizontal_sum(r, rw) \ + r = _mm_hadd_ps(r, r); \ + r = _mm_hadd_ps(r, r); + +#else +/* + Horizontal add with SSE instruction. The work register (rw) is used. + */ +#define __horizontal_sum(r, rw) \ + rw = r; \ + r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \ + r = _mm_add_ps(r, rw); \ + rw = r; \ + r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \ + r = _mm_add_ps(r, rw); + +#endif + +#define vecdot(s, x, y, n) \ +{ \ + int i; \ + __m128d XMM0 = _mm_setzero_pd(); \ + __m128d XMM1 = _mm_setzero_pd(); \ + __m128d XMM2, XMM3, XMM4, XMM5; \ + for (i = 0;i < (n);i += 4) { \ + XMM2 = _mm_load_pd((x)+i ); \ + XMM3 = _mm_load_pd((x)+i+2); \ + XMM4 = _mm_load_pd((y)+i ); \ + XMM5 = _mm_load_pd((y)+i+2); \ + XMM2 = _mm_mul_pd(XMM2, XMM4); \ + XMM3 = _mm_mul_pd(XMM3, XMM5); \ + XMM0 = _mm_add_pd(XMM0, XMM2); \ + XMM1 = _mm_add_pd(XMM1, XMM3); \ + } \ + XMM0 = _mm_add_pd(XMM0, XMM1); \ + XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ + XMM0 = _mm_add_pd(XMM0, XMM1); \ + _mm_store_sd((s), XMM0); \ +} + +#define vec2norm(s, x, n) \ +{ \ + int i; \ + __m128d XMM0 = _mm_setzero_pd(); \ + __m128d XMM1 = _mm_setzero_pd(); \ + __m128d XMM2, XMM3, XMM4, XMM5; \ + for (i = 0;i < (n);i += 4) { \ + XMM2 = _mm_load_pd((x)+i ); \ + XMM3 = _mm_load_pd((x)+i+2); \ + XMM4 = XMM2; \ + XMM5 = XMM3; \ + XMM2 = _mm_mul_pd(XMM2, XMM4); \ + XMM3 = _mm_mul_pd(XMM3, XMM5); \ + XMM0 = _mm_add_pd(XMM0, XMM2); \ + XMM1 = _mm_add_pd(XMM1, XMM3); \ + } \ + XMM0 = _mm_add_pd(XMM0, XMM1); \ + XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ + XMM0 = _mm_add_pd(XMM0, XMM1); \ + XMM0 = _mm_sqrt_pd(XMM0); \ + _mm_store_sd((s), XMM0); \ +} + + +#define vec2norminv(s, x, n) \ +{ \ + int i; \ + __m128d XMM0 = _mm_setzero_pd(); \ + __m128d XMM1 = _mm_setzero_pd(); \ + __m128d XMM2, XMM3, XMM4, XMM5; \ + for (i = 0;i < (n);i += 4) { \ + XMM2 = _mm_load_pd((x)+i ); \ + XMM3 = _mm_load_pd((x)+i+2); \ + XMM4 = XMM2; \ + XMM5 = XMM3; \ + XMM2 = _mm_mul_pd(XMM2, XMM4); \ + XMM3 = _mm_mul_pd(XMM3, XMM5); \ + XMM0 = _mm_add_pd(XMM0, XMM2); \ + XMM1 = _mm_add_pd(XMM1, XMM3); \ + } \ + XMM2 = _mm_set1_pd(1.0); \ + XMM0 = _mm_add_pd(XMM0, XMM1); \ + XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ + XMM0 = _mm_add_pd(XMM0, XMM1); \ + XMM0 = _mm_sqrt_pd(XMM0); \ + XMM2 = _mm_div_pd(XMM2, XMM0); \ + _mm_store_sd((s), XMM2); \ +} diff --git a/lib/arithmetic_sse_float.h b/lib/arithmetic_sse_float.h new file mode 100644 index 00000000000..835b8aa8a19 --- /dev/null +++ b/lib/arithmetic_sse_float.h @@ -0,0 +1,298 @@ +/* + * SSE/SSE3 implementation of vector oprations (32bit float). + * + * Copyright (c) 2007-2010 Naoaki Okazaki + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* $Id$ */ + +#include +#ifndef __APPLE__ +#include +#endif +#include + +#if 1400 <= _MSC_VER +#include +#endif/*_MSC_VER*/ + +#if HAVE_XMMINTRIN_H +#include +#endif/*HAVE_XMMINTRIN_H*/ + +#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT +#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U) +#else +#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.) +#endif/*LBFGS_IEEE_FLOAT*/ + +inline static void* vecalloc(size_t size) +{ +#if defined(_MSC_VER) + void *memblock = _aligned_malloc(size, 16); +#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */ + void *memblock = malloc(size); +#else + void *memblock = NULL, *p = NULL; + if (posix_memalign(&p, 16, size) == 0) { + memblock = p; + } +#endif + if (memblock != NULL) { + memset(memblock, 0, size); + } + return memblock; +} + +inline static void vecfree(void *memblock) +{ + _aligned_free(memblock); +} + +#define vecset(x, c, n) \ +{ \ + int i; \ + __m128 XMM0 = _mm_set_ps1(c); \ + for (i = 0;i < (n);i += 16) { \ + _mm_store_ps((x)+i , XMM0); \ + _mm_store_ps((x)+i+ 4, XMM0); \ + _mm_store_ps((x)+i+ 8, XMM0); \ + _mm_store_ps((x)+i+12, XMM0); \ + } \ +} + +#define veccpy(y, x, n) \ +{ \ + int i; \ + for (i = 0;i < (n);i += 16) { \ + __m128 XMM0 = _mm_load_ps((x)+i ); \ + __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ + __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ + __m128 XMM3 = _mm_load_ps((x)+i+12); \ + _mm_store_ps((y)+i , XMM0); \ + _mm_store_ps((y)+i+ 4, XMM1); \ + _mm_store_ps((y)+i+ 8, XMM2); \ + _mm_store_ps((y)+i+12, XMM3); \ + } \ +} + +#define vecncpy(y, x, n) \ +{ \ + int i; \ + const uint32_t mask = 0x80000000; \ + __m128 XMM4 = _mm_load_ps1((float*)&mask); \ + for (i = 0;i < (n);i += 16) { \ + __m128 XMM0 = _mm_load_ps((x)+i ); \ + __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ + __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ + __m128 XMM3 = _mm_load_ps((x)+i+12); \ + XMM0 = _mm_xor_ps(XMM0, XMM4); \ + XMM1 = _mm_xor_ps(XMM1, XMM4); \ + XMM2 = _mm_xor_ps(XMM2, XMM4); \ + XMM3 = _mm_xor_ps(XMM3, XMM4); \ + _mm_store_ps((y)+i , XMM0); \ + _mm_store_ps((y)+i+ 4, XMM1); \ + _mm_store_ps((y)+i+ 8, XMM2); \ + _mm_store_ps((y)+i+12, XMM3); \ + } \ +} + +#define vecadd(y, x, c, n) \ +{ \ + int i; \ + __m128 XMM7 = _mm_set_ps1(c); \ + for (i = 0;i < (n);i += 8) { \ + __m128 XMM0 = _mm_load_ps((x)+i ); \ + __m128 XMM1 = _mm_load_ps((x)+i+4); \ + __m128 XMM2 = _mm_load_ps((y)+i ); \ + __m128 XMM3 = _mm_load_ps((y)+i+4); \ + XMM0 = _mm_mul_ps(XMM0, XMM7); \ + XMM1 = _mm_mul_ps(XMM1, XMM7); \ + XMM2 = _mm_add_ps(XMM2, XMM0); \ + XMM3 = _mm_add_ps(XMM3, XMM1); \ + _mm_store_ps((y)+i , XMM2); \ + _mm_store_ps((y)+i+4, XMM3); \ + } \ +} + +#define vecdiff(z, x, y, n) \ +{ \ + int i; \ + for (i = 0;i < (n);i += 16) { \ + __m128 XMM0 = _mm_load_ps((x)+i ); \ + __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ + __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ + __m128 XMM3 = _mm_load_ps((x)+i+12); \ + __m128 XMM4 = _mm_load_ps((y)+i ); \ + __m128 XMM5 = _mm_load_ps((y)+i+ 4); \ + __m128 XMM6 = _mm_load_ps((y)+i+ 8); \ + __m128 XMM7 = _mm_load_ps((y)+i+12); \ + XMM0 = _mm_sub_ps(XMM0, XMM4); \ + XMM1 = _mm_sub_ps(XMM1, XMM5); \ + XMM2 = _mm_sub_ps(XMM2, XMM6); \ + XMM3 = _mm_sub_ps(XMM3, XMM7); \ + _mm_store_ps((z)+i , XMM0); \ + _mm_store_ps((z)+i+ 4, XMM1); \ + _mm_store_ps((z)+i+ 8, XMM2); \ + _mm_store_ps((z)+i+12, XMM3); \ + } \ +} + +#define vecscale(y, c, n) \ +{ \ + int i; \ + __m128 XMM7 = _mm_set_ps1(c); \ + for (i = 0;i < (n);i += 8) { \ + __m128 XMM0 = _mm_load_ps((y)+i ); \ + __m128 XMM1 = _mm_load_ps((y)+i+4); \ + XMM0 = _mm_mul_ps(XMM0, XMM7); \ + XMM1 = _mm_mul_ps(XMM1, XMM7); \ + _mm_store_ps((y)+i , XMM0); \ + _mm_store_ps((y)+i+4, XMM1); \ + } \ +} + +#define vecmul(y, x, n) \ +{ \ + int i; \ + for (i = 0;i < (n);i += 16) { \ + __m128 XMM0 = _mm_load_ps((x)+i ); \ + __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ + __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ + __m128 XMM3 = _mm_load_ps((x)+i+12); \ + __m128 XMM4 = _mm_load_ps((y)+i ); \ + __m128 XMM5 = _mm_load_ps((y)+i+ 4); \ + __m128 XMM6 = _mm_load_ps((y)+i+ 8); \ + __m128 XMM7 = _mm_load_ps((y)+i+12); \ + XMM4 = _mm_mul_ps(XMM4, XMM0); \ + XMM5 = _mm_mul_ps(XMM5, XMM1); \ + XMM6 = _mm_mul_ps(XMM6, XMM2); \ + XMM7 = _mm_mul_ps(XMM7, XMM3); \ + _mm_store_ps((y)+i , XMM4); \ + _mm_store_ps((y)+i+ 4, XMM5); \ + _mm_store_ps((y)+i+ 8, XMM6); \ + _mm_store_ps((y)+i+12, XMM7); \ + } \ +} + + + +#if 3 <= __SSE__ || defined(__SSE3__) +/* + Horizontal add with haddps SSE3 instruction. The work register (rw) + is unused. + */ +#define __horizontal_sum(r, rw) \ + r = _mm_hadd_ps(r, r); \ + r = _mm_hadd_ps(r, r); + +#else +/* + Horizontal add with SSE instruction. The work register (rw) is used. + */ +#define __horizontal_sum(r, rw) \ + rw = r; \ + r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \ + r = _mm_add_ps(r, rw); \ + rw = r; \ + r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \ + r = _mm_add_ps(r, rw); + +#endif + +#define vecdot(s, x, y, n) \ +{ \ + int i; \ + __m128 XMM0 = _mm_setzero_ps(); \ + __m128 XMM1 = _mm_setzero_ps(); \ + __m128 XMM2, XMM3, XMM4, XMM5; \ + for (i = 0;i < (n);i += 8) { \ + XMM2 = _mm_load_ps((x)+i ); \ + XMM3 = _mm_load_ps((x)+i+4); \ + XMM4 = _mm_load_ps((y)+i ); \ + XMM5 = _mm_load_ps((y)+i+4); \ + XMM2 = _mm_mul_ps(XMM2, XMM4); \ + XMM3 = _mm_mul_ps(XMM3, XMM5); \ + XMM0 = _mm_add_ps(XMM0, XMM2); \ + XMM1 = _mm_add_ps(XMM1, XMM3); \ + } \ + XMM0 = _mm_add_ps(XMM0, XMM1); \ + __horizontal_sum(XMM0, XMM1); \ + _mm_store_ss((s), XMM0); \ +} + +#define vec2norm(s, x, n) \ +{ \ + int i; \ + __m128 XMM0 = _mm_setzero_ps(); \ + __m128 XMM1 = _mm_setzero_ps(); \ + __m128 XMM2, XMM3; \ + for (i = 0;i < (n);i += 8) { \ + XMM2 = _mm_load_ps((x)+i ); \ + XMM3 = _mm_load_ps((x)+i+4); \ + XMM2 = _mm_mul_ps(XMM2, XMM2); \ + XMM3 = _mm_mul_ps(XMM3, XMM3); \ + XMM0 = _mm_add_ps(XMM0, XMM2); \ + XMM1 = _mm_add_ps(XMM1, XMM3); \ + } \ + XMM0 = _mm_add_ps(XMM0, XMM1); \ + __horizontal_sum(XMM0, XMM1); \ + XMM2 = XMM0; \ + XMM1 = _mm_rsqrt_ss(XMM0); \ + XMM3 = XMM1; \ + XMM1 = _mm_mul_ss(XMM1, XMM1); \ + XMM1 = _mm_mul_ss(XMM1, XMM3); \ + XMM1 = _mm_mul_ss(XMM1, XMM0); \ + XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \ + XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \ + XMM3 = _mm_add_ss(XMM3, XMM1); \ + XMM3 = _mm_mul_ss(XMM3, XMM2); \ + _mm_store_ss((s), XMM3); \ +} + +#define vec2norminv(s, x, n) \ +{ \ + int i; \ + __m128 XMM0 = _mm_setzero_ps(); \ + __m128 XMM1 = _mm_setzero_ps(); \ + __m128 XMM2, XMM3; \ + for (i = 0;i < (n);i += 16) { \ + XMM2 = _mm_load_ps((x)+i ); \ + XMM3 = _mm_load_ps((x)+i+4); \ + XMM2 = _mm_mul_ps(XMM2, XMM2); \ + XMM3 = _mm_mul_ps(XMM3, XMM3); \ + XMM0 = _mm_add_ps(XMM0, XMM2); \ + XMM1 = _mm_add_ps(XMM1, XMM3); \ + } \ + XMM0 = _mm_add_ps(XMM0, XMM1); \ + __horizontal_sum(XMM0, XMM1); \ + XMM2 = XMM0; \ + XMM1 = _mm_rsqrt_ss(XMM0); \ + XMM3 = XMM1; \ + XMM1 = _mm_mul_ss(XMM1, XMM1); \ + XMM1 = _mm_mul_ss(XMM1, XMM3); \ + XMM1 = _mm_mul_ss(XMM1, XMM0); \ + XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \ + XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \ + XMM3 = _mm_add_ss(XMM3, XMM1); \ + _mm_store_ss((s), XMM3); \ +} diff --git a/lib/lbfgs.c b/lib/lbfgs.c new file mode 100644 index 00000000000..30ffcf2ab0b --- /dev/null +++ b/lib/lbfgs.c @@ -0,0 +1,1371 @@ +/* + * Limited memory BFGS (L-BFGS). + * + * Copyright (c) 1990, Jorge Nocedal + * Copyright (c) 2007-2010 Naoaki Okazaki + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* $Id$ */ + +/* +This library is a C port of the FORTRAN implementation of Limited-memory +Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal. +The original FORTRAN source code is available at: +http://www.ece.northwestern.edu/~nocedal/lbfgs.html + +The L-BFGS algorithm is described in: + - Jorge Nocedal. + Updating Quasi-Newton Matrices with Limited Storage. + Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. + - Dong C. Liu and Jorge Nocedal. + On the limited memory BFGS method for large scale optimization. + Mathematical Programming B, Vol. 45, No. 3, pp. 503-528, 1989. + +The line search algorithms used in this implementation are described in: + - John E. Dennis and Robert B. Schnabel. + Numerical Methods for Unconstrained Optimization and Nonlinear + Equations, Englewood Cliffs, 1983. + - Jorge J. More and David J. Thuente. + Line search algorithm with guaranteed sufficient decrease. + ACM Transactions on Mathematical Software (TOMS), Vol. 20, No. 3, + pp. 286-307, 1994. + +This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) +method presented in: + - Galen Andrew and Jianfeng Gao. + Scalable training of L1-regularized log-linear models. + In Proceedings of the 24th International Conference on Machine + Learning (ICML 2007), pp. 33-40, 2007. + +I would like to thank the original author, Jorge Nocedal, who has been +distributing the effieicnt and explanatory implementation in an open source +licence. +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif/*HAVE_CONFIG_H*/ + +#include +#include +#include +#include + +#include + +#ifdef _MSC_VER +#define inline __inline +#endif/*_MSC_VER*/ + +#if defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64 +/* Use SSE2 optimization for 64bit double precision. */ +#include "arithmetic_sse_double.h" + +#elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32 +/* Use SSE optimization for 32bit float precision. */ +#include "arithmetic_sse_float.h" + +#else +/* No CPU specific optimization. */ +#include "arithmetic_ansi.h" + +#endif + +#define min2(a, b) ((a) <= (b) ? (a) : (b)) +#define max2(a, b) ((a) >= (b) ? (a) : (b)) +#define max3(a, b, c) max2(max2((a), (b)), (c)); + +struct tag_callback_data { + int n; + void *instance; + lbfgs_evaluate_t proc_evaluate; + lbfgs_progress_t proc_progress; +}; +typedef struct tag_callback_data callback_data_t; + +struct tag_iteration_data { + lbfgsfloatval_t alpha; + lbfgsfloatval_t *s; /* [n] */ + lbfgsfloatval_t *y; /* [n] */ + lbfgsfloatval_t ys; /* vecdot(y, s) */ +}; +typedef struct tag_iteration_data iteration_data_t; + +static const lbfgs_parameter_t _defparam = { + 6, 1e-5, 0, 1e-5, + 0, LBFGS_LINESEARCH_DEFAULT, 40, + 1e-20, 1e20, 1e-4, 0.9, 0.9, 1.0e-16, + 0.0, 0, -1, +}; + +/* Forward function declarations. */ + +typedef int (*line_search_proc)( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *f, + lbfgsfloatval_t *g, + lbfgsfloatval_t *s, + lbfgsfloatval_t *stp, + const lbfgsfloatval_t* xp, + const lbfgsfloatval_t* gp, + lbfgsfloatval_t *wa, + callback_data_t *cd, + const lbfgs_parameter_t *param + ); + +static int line_search_backtracking( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *f, + lbfgsfloatval_t *g, + lbfgsfloatval_t *s, + lbfgsfloatval_t *stp, + const lbfgsfloatval_t* xp, + const lbfgsfloatval_t* gp, + lbfgsfloatval_t *wa, + callback_data_t *cd, + const lbfgs_parameter_t *param + ); + +static int line_search_backtracking_owlqn( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *f, + lbfgsfloatval_t *g, + lbfgsfloatval_t *s, + lbfgsfloatval_t *stp, + const lbfgsfloatval_t* xp, + const lbfgsfloatval_t* gp, + lbfgsfloatval_t *wp, + callback_data_t *cd, + const lbfgs_parameter_t *param + ); + +static int line_search_morethuente( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *f, + lbfgsfloatval_t *g, + lbfgsfloatval_t *s, + lbfgsfloatval_t *stp, + const lbfgsfloatval_t* xp, + const lbfgsfloatval_t* gp, + lbfgsfloatval_t *wa, + callback_data_t *cd, + const lbfgs_parameter_t *param + ); + +static int update_trial_interval( + lbfgsfloatval_t *x, + lbfgsfloatval_t *fx, + lbfgsfloatval_t *dx, + lbfgsfloatval_t *y, + lbfgsfloatval_t *fy, + lbfgsfloatval_t *dy, + lbfgsfloatval_t *t, + lbfgsfloatval_t *ft, + lbfgsfloatval_t *dt, + const lbfgsfloatval_t tmin, + const lbfgsfloatval_t tmax, + int *brackt + ); + +static lbfgsfloatval_t owlqn_x1norm( + const lbfgsfloatval_t* x, + const int start, + const int n + ); + +static void owlqn_pseudo_gradient( + lbfgsfloatval_t* pg, + const lbfgsfloatval_t* x, + const lbfgsfloatval_t* g, + const int n, + const lbfgsfloatval_t c, + const int start, + const int end + ); + +static void owlqn_project( + lbfgsfloatval_t* d, + const lbfgsfloatval_t* sign, + const int start, + const int end + ); + + +#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) +static int round_out_variables(int n) +{ + n += 7; + n /= 8; + n *= 8; + return n; +} +#endif/*defined(USE_SSE)*/ + +lbfgsfloatval_t* lbfgs_malloc(int n) +{ +#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) + n = round_out_variables(n); +#endif/*defined(USE_SSE)*/ + return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * n); +} + +void lbfgs_free(lbfgsfloatval_t *x) +{ + vecfree(x); +} + +void lbfgs_parameter_init(lbfgs_parameter_t *param) +{ + memcpy(param, &_defparam, sizeof(*param)); +} + +int lbfgs( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *ptr_fx, + lbfgs_evaluate_t proc_evaluate, + lbfgs_progress_t proc_progress, + void *instance, + lbfgs_parameter_t *_param + ) +{ + int ret; + int i, j, k, ls, end, bound; + lbfgsfloatval_t step; + + /* Constant parameters and their default values. */ + lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam; + const int m = param.m; + + lbfgsfloatval_t *xp = NULL; + lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL; + lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL; + iteration_data_t *lm = NULL, *it = NULL; + lbfgsfloatval_t ys, yy; + lbfgsfloatval_t xnorm, gnorm, beta; + lbfgsfloatval_t fx = 0.; + lbfgsfloatval_t rate = 0.; + line_search_proc linesearch = line_search_morethuente; + + /* Construct a callback data. */ + callback_data_t cd; + cd.n = n; + cd.instance = instance; + cd.proc_evaluate = proc_evaluate; + cd.proc_progress = proc_progress; + +#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) + /* Round out the number of variables. */ + n = round_out_variables(n); +#endif/*defined(USE_SSE)*/ + + /* Check the input parameters for errors. */ + if (n <= 0) { + return LBFGSERR_INVALID_N; + } +#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) + if (n % 8 != 0) { + return LBFGSERR_INVALID_N_SSE; + } + if ((uintptr_t)(const void*)x % 16 != 0) { + return LBFGSERR_INVALID_X_SSE; + } +#endif/*defined(USE_SSE)*/ + if (param.epsilon < 0.) { + return LBFGSERR_INVALID_EPSILON; + } + if (param.past < 0) { + return LBFGSERR_INVALID_TESTPERIOD; + } + if (param.delta < 0.) { + return LBFGSERR_INVALID_DELTA; + } + if (param.min_step < 0.) { + return LBFGSERR_INVALID_MINSTEP; + } + if (param.max_step < param.min_step) { + return LBFGSERR_INVALID_MAXSTEP; + } + if (param.ftol < 0.) { + return LBFGSERR_INVALID_FTOL; + } + if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || + param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { + if (param.wolfe <= param.ftol || 1. <= param.wolfe) { + return LBFGSERR_INVALID_WOLFE; + } + } + if (param.gtol < 0.) { + return LBFGSERR_INVALID_GTOL; + } + if (param.xtol < 0.) { + return LBFGSERR_INVALID_XTOL; + } + if (param.max_linesearch <= 0) { + return LBFGSERR_INVALID_MAXLINESEARCH; + } + if (param.orthantwise_c < 0.) { + return LBFGSERR_INVALID_ORTHANTWISE; + } + if (param.orthantwise_start < 0 || n < param.orthantwise_start) { + return LBFGSERR_INVALID_ORTHANTWISE_START; + } + if (param.orthantwise_end < 0) { + param.orthantwise_end = n; + } + if (n < param.orthantwise_end) { + return LBFGSERR_INVALID_ORTHANTWISE_END; + } + if (param.orthantwise_c != 0.) { + switch (param.linesearch) { + case LBFGS_LINESEARCH_BACKTRACKING: + linesearch = line_search_backtracking_owlqn; + break; + default: + /* Only the backtracking method is available. */ + return LBFGSERR_INVALID_LINESEARCH; + } + } else { + switch (param.linesearch) { + case LBFGS_LINESEARCH_MORETHUENTE: + linesearch = line_search_morethuente; + break; + case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: + case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: + case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: + linesearch = line_search_backtracking; + break; + default: + return LBFGSERR_INVALID_LINESEARCH; + } + } + + /* Allocate working space. */ + xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) { + ret = LBFGSERR_OUTOFMEMORY; + goto lbfgs_exit; + } + + if (param.orthantwise_c != 0.) { + /* Allocate working space for OW-LQN. */ + pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + if (pg == NULL) { + ret = LBFGSERR_OUTOFMEMORY; + goto lbfgs_exit; + } + } + + /* Allocate limited memory storage. */ + lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t)); + if (lm == NULL) { + ret = LBFGSERR_OUTOFMEMORY; + goto lbfgs_exit; + } + + /* Initialize the limited memory. */ + for (i = 0;i < m;++i) { + it = &lm[i]; + it->alpha = 0; + it->ys = 0; + it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + if (it->s == NULL || it->y == NULL) { + ret = LBFGSERR_OUTOFMEMORY; + goto lbfgs_exit; + } + } + + /* Allocate an array for storing previous values of the objective function. */ + if (0 < param.past) { + pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t)); + } + + /* Evaluate the function value and its gradient. */ + fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); + if (0. != param.orthantwise_c) { + /* Compute the L1 norm of the variable and add it to the object value. */ + xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end); + fx += xnorm * param.orthantwise_c; + owlqn_pseudo_gradient( + pg, x, g, n, + param.orthantwise_c, param.orthantwise_start, param.orthantwise_end + ); + } + + /* Store the initial value of the objective function. */ + if (pf != NULL) { + pf[0] = fx; + } + + /* + Compute the direction; + we assume the initial hessian matrix H_0 as the identity matrix. + */ + if (param.orthantwise_c == 0.) { + vecncpy(d, g, n); + } else { + vecncpy(d, pg, n); + } + + /* + Make sure that the initial variables are not a minimizer. + */ + vec2norm(&xnorm, x, n); + if (param.orthantwise_c == 0.) { + vec2norm(&gnorm, g, n); + } else { + vec2norm(&gnorm, pg, n); + } + if (xnorm < 1.0) xnorm = 1.0; + if (gnorm / xnorm <= param.epsilon) { + ret = LBFGS_ALREADY_MINIMIZED; + goto lbfgs_exit; + } + + /* Compute the initial step: + step = 1.0 / sqrt(vecdot(d, d, n)) + */ + vec2norminv(&step, d, n); + + k = 1; + end = 0; + for (;;) { + /* Store the current position and gradient vectors. */ + veccpy(xp, x, n); + veccpy(gp, g, n); + + /* Search for an optimal step. */ + if (param.orthantwise_c == 0.) { + ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m); + } else { + ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); + owlqn_pseudo_gradient( + pg, x, g, n, + param.orthantwise_c, param.orthantwise_start, param.orthantwise_end + ); + } + if (ls < 0) { + /* Revert to the previous point. */ + veccpy(x, xp, n); + veccpy(g, gp, n); + ret = ls; + goto lbfgs_exit; + } + + /* Compute x and g norms. */ + vec2norm(&xnorm, x, n); + if (param.orthantwise_c == 0.) { + vec2norm(&gnorm, g, n); + } else { + vec2norm(&gnorm, pg, n); + } + + /* Report the progress. */ + if (cd.proc_progress) { + if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) { + goto lbfgs_exit; + } + } + + /* + Convergence test. + The criterion is given by the following formula: + |g(x)| / \max(1, |x|) < \epsilon + */ + if (xnorm < 1.0) xnorm = 1.0; + if (gnorm / xnorm <= param.epsilon) { + /* Convergence. */ + ret = LBFGS_SUCCESS; + break; + } + + /* + Test for stopping criterion. + The criterion is given by the following formula: + (f(past_x) - f(x)) / f(x) < \delta + */ + if (pf != NULL) { + /* We don't test the stopping criterion while k < past. */ + if (param.past <= k) { + /* Compute the relative improvement from the past. */ + rate = (pf[k % param.past] - fx) / fx; + + /* The stopping criterion. */ + if (rate < param.delta) { + ret = LBFGS_STOP; + break; + } + } + + /* Store the current value of the objective function. */ + pf[k % param.past] = fx; + } + + if (param.max_iterations != 0 && param.max_iterations < k+1) { + /* Maximum number of iterations. */ + ret = LBFGSERR_MAXIMUMITERATION; + break; + } + + /* + Update vectors s and y: + s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. + y_{k+1} = g_{k+1} - g_{k}. + */ + it = &lm[end]; + vecdiff(it->s, x, xp, n); + vecdiff(it->y, g, gp, n); + + /* + Compute scalars ys and yy: + ys = y^t \cdot s = 1 / \rho. + yy = y^t \cdot y. + Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). + */ + vecdot(&ys, it->y, it->s, n); + vecdot(&yy, it->y, it->y, n); + it->ys = ys; + + /* + Recursive formula to compute dir = -(H \cdot g). + This is described in page 779 of: + Jorge Nocedal. + Updating Quasi-Newton Matrices with Limited Storage. + Mathematics of Computation, Vol. 35, No. 151, + pp. 773--782, 1980. + */ + bound = (m <= k) ? m : k; + ++k; + end = (end + 1) % m; + + /* Compute the steepest direction. */ + if (param.orthantwise_c == 0.) { + /* Compute the negative of gradients. */ + vecncpy(d, g, n); + } else { + vecncpy(d, pg, n); + } + + j = end; + for (i = 0;i < bound;++i) { + j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ + it = &lm[j]; + /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */ + vecdot(&it->alpha, it->s, d, n); + it->alpha /= it->ys; + /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */ + vecadd(d, it->y, -it->alpha, n); + } + + vecscale(d, ys / yy, n); + + for (i = 0;i < bound;++i) { + it = &lm[j]; + /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */ + vecdot(&beta, it->y, d, n); + beta /= it->ys; + /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */ + vecadd(d, it->s, it->alpha - beta, n); + j = (j + 1) % m; /* if (++j == m) j = 0; */ + } + + /* + Constrain the search direction for orthant-wise updates. + */ + if (param.orthantwise_c != 0.) { + for (i = param.orthantwise_start;i < param.orthantwise_end;++i) { + if (d[i] * pg[i] >= 0) { + d[i] = 0; + } + } + } + + /* + Now the search direction d is ready. We try step = 1 first. + */ + step = 1.0; + } + +lbfgs_exit: + /* Return the final value of the objective function. */ + if (ptr_fx != NULL) { + *ptr_fx = fx; + } + + vecfree(pf); + + /* Free memory blocks used by this function. */ + if (lm != NULL) { + for (i = 0;i < m;++i) { + vecfree(lm[i].s); + vecfree(lm[i].y); + } + vecfree(lm); + } + vecfree(pg); + vecfree(w); + vecfree(d); + vecfree(gp); + vecfree(g); + vecfree(xp); + + return ret; +} + + + +static int line_search_backtracking( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *f, + lbfgsfloatval_t *g, + lbfgsfloatval_t *s, + lbfgsfloatval_t *stp, + const lbfgsfloatval_t* xp, + const lbfgsfloatval_t* gp, + lbfgsfloatval_t *wp, + callback_data_t *cd, + const lbfgs_parameter_t *param + ) +{ + int count = 0; + lbfgsfloatval_t width, dg; + lbfgsfloatval_t finit, dginit = 0., dgtest; + const lbfgsfloatval_t dec = 0.5, inc = 2.1; + + /* Check the input parameters for errors. */ + if (*stp <= 0.) { + return LBFGSERR_INVALIDPARAMETERS; + } + + /* Compute the initial gradient in the search direction. */ + vecdot(&dginit, g, s, n); + + /* Make sure that s points to a descent direction. */ + if (0 < dginit) { + return LBFGSERR_INCREASEGRADIENT; + } + + /* The initial value of the objective function. */ + finit = *f; + dgtest = param->ftol * dginit; + + for (;;) { + veccpy(x, xp, n); + vecadd(x, s, *stp, n); + + /* Evaluate the function and gradient values. */ + *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); + + ++count; + + if (*f > finit + *stp * dgtest) { + width = dec; + } else { + /* The sufficient decrease condition (Armijo condition). */ + if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) { + /* Exit with the Armijo condition. */ + return count; + } + + /* Check the Wolfe condition. */ + vecdot(&dg, g, s, n); + if (dg < param->wolfe * dginit) { + width = inc; + } else { + if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) { + /* Exit with the regular Wolfe condition. */ + return count; + } + + /* Check the strong Wolfe condition. */ + if(dg > -param->wolfe * dginit) { + width = dec; + } else { + /* Exit with the strong Wolfe condition. */ + return count; + } + } + } + + if (*stp < param->min_step) { + /* The step is the minimum value. */ + return LBFGSERR_MINIMUMSTEP; + } + if (*stp > param->max_step) { + /* The step is the maximum value. */ + return LBFGSERR_MAXIMUMSTEP; + } + if (param->max_linesearch <= count) { + /* Maximum number of iteration. */ + return LBFGSERR_MAXIMUMLINESEARCH; + } + + (*stp) *= width; + } +} + + + +static int line_search_backtracking_owlqn( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *f, + lbfgsfloatval_t *g, + lbfgsfloatval_t *s, + lbfgsfloatval_t *stp, + const lbfgsfloatval_t* xp, + const lbfgsfloatval_t* gp, + lbfgsfloatval_t *wp, + callback_data_t *cd, + const lbfgs_parameter_t *param + ) +{ + int i, count = 0; + lbfgsfloatval_t width = 0.5, norm = 0.; + lbfgsfloatval_t finit = *f, dgtest; + + /* Check the input parameters for errors. */ + if (*stp <= 0.) { + return LBFGSERR_INVALIDPARAMETERS; + } + + /* Choose the orthant for the new point. */ + for (i = 0;i < n;++i) { + wp[i] = (xp[i] == 0.) ? -gp[i] : xp[i]; + } + + for (;;) { + /* Update the current point. */ + veccpy(x, xp, n); + vecadd(x, s, *stp, n); + + /* The current point is projected onto the orthant. */ + owlqn_project(x, wp, param->orthantwise_start, param->orthantwise_end); + + /* Evaluate the function and gradient values. */ + *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); + + /* Compute the L1 norm of the variables and add it to the object value. */ + norm = owlqn_x1norm(x, param->orthantwise_start, param->orthantwise_end); + *f += norm * param->orthantwise_c; + + ++count; + + dgtest = 0.; + for (i = 0;i < n;++i) { + dgtest += (x[i] - xp[i]) * gp[i]; + } + + if (*f <= finit + param->ftol * dgtest) { + /* The sufficient decrease condition. */ + return count; + } + + if (*stp < param->min_step) { + /* The step is the minimum value. */ + return LBFGSERR_MINIMUMSTEP; + } + if (*stp > param->max_step) { + /* The step is the maximum value. */ + return LBFGSERR_MAXIMUMSTEP; + } + if (param->max_linesearch <= count) { + /* Maximum number of iteration. */ + return LBFGSERR_MAXIMUMLINESEARCH; + } + + (*stp) *= width; + } +} + + + +static int line_search_morethuente( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *f, + lbfgsfloatval_t *g, + lbfgsfloatval_t *s, + lbfgsfloatval_t *stp, + const lbfgsfloatval_t* xp, + const lbfgsfloatval_t* gp, + lbfgsfloatval_t *wa, + callback_data_t *cd, + const lbfgs_parameter_t *param + ) +{ + int count = 0; + int brackt, stage1, uinfo = 0; + lbfgsfloatval_t dg; + lbfgsfloatval_t stx, fx, dgx; + lbfgsfloatval_t sty, fy, dgy; + lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm; + lbfgsfloatval_t finit, ftest1, dginit, dgtest; + lbfgsfloatval_t width, prev_width; + lbfgsfloatval_t stmin, stmax; + + /* Check the input parameters for errors. */ + if (*stp <= 0.) { + return LBFGSERR_INVALIDPARAMETERS; + } + + /* Compute the initial gradient in the search direction. */ + vecdot(&dginit, g, s, n); + + /* Make sure that s points to a descent direction. */ + if (0 < dginit) { + return LBFGSERR_INCREASEGRADIENT; + } + + /* Initialize local variables. */ + brackt = 0; + stage1 = 1; + finit = *f; + dgtest = param->ftol * dginit; + width = param->max_step - param->min_step; + prev_width = 2.0 * width; + + /* + The variables stx, fx, dgx contain the values of the step, + function, and directional derivative at the best step. + The variables sty, fy, dgy contain the value of the step, + function, and derivative at the other endpoint of + the interval of uncertainty. + The variables stp, f, dg contain the values of the step, + function, and derivative at the current step. + */ + stx = sty = 0.; + fx = fy = finit; + dgx = dgy = dginit; + + for (;;) { + /* + Set the minimum and maximum steps to correspond to the + present interval of uncertainty. + */ + if (brackt) { + stmin = min2(stx, sty); + stmax = max2(stx, sty); + } else { + stmin = stx; + stmax = *stp + 4.0 * (*stp - stx); + } + + /* Clip the step in the range of [stpmin, stpmax]. */ + if (*stp < param->min_step) *stp = param->min_step; + if (param->max_step < *stp) *stp = param->max_step; + + /* + If an unusual termination is to occur then let + stp be the lowest point obtained so far. + */ + if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) { + *stp = stx; + } + + /* + Compute the current value of x: + x <- x + (*stp) * s. + */ + veccpy(x, xp, n); + vecadd(x, s, *stp, n); + + /* Evaluate the function and gradient values. */ + *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); + vecdot(&dg, g, s, n); + + ftest1 = finit + *stp * dgtest; + ++count; + + /* Test for errors and convergence. */ + if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) { + /* Rounding errors prevent further progress. */ + return LBFGSERR_ROUNDING_ERROR; + } + if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) { + /* The step is the maximum value. */ + return LBFGSERR_MAXIMUMSTEP; + } + if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) { + /* The step is the minimum value. */ + return LBFGSERR_MINIMUMSTEP; + } + if (brackt && (stmax - stmin) <= param->xtol * stmax) { + /* Relative width of the interval of uncertainty is at most xtol. */ + return LBFGSERR_WIDTHTOOSMALL; + } + if (param->max_linesearch <= count) { + /* Maximum number of iteration. */ + return LBFGSERR_MAXIMUMLINESEARCH; + } + if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) { + /* The sufficient decrease condition and the directional derivative condition hold. */ + return count; + } + + /* + In the first stage we seek a step for which the modified + function has a nonpositive value and nonnegative derivative. + */ + if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) { + stage1 = 0; + } + + /* + A modified function is used to predict the step only if + we have not obtained a step for which the modified + function has a nonpositive function value and nonnegative + derivative, and if a lower function value has been + obtained but the decrease is not sufficient. + */ + if (stage1 && ftest1 < *f && *f <= fx) { + /* Define the modified function and derivative values. */ + fm = *f - *stp * dgtest; + fxm = fx - stx * dgtest; + fym = fy - sty * dgtest; + dgm = dg - dgtest; + dgxm = dgx - dgtest; + dgym = dgy - dgtest; + + /* + Call update_trial_interval() to update the interval of + uncertainty and to compute the new step. + */ + uinfo = update_trial_interval( + &stx, &fxm, &dgxm, + &sty, &fym, &dgym, + stp, &fm, &dgm, + stmin, stmax, &brackt + ); + + /* Reset the function and gradient values for f. */ + fx = fxm + stx * dgtest; + fy = fym + sty * dgtest; + dgx = dgxm + dgtest; + dgy = dgym + dgtest; + } else { + /* + Call update_trial_interval() to update the interval of + uncertainty and to compute the new step. + */ + uinfo = update_trial_interval( + &stx, &fx, &dgx, + &sty, &fy, &dgy, + stp, f, &dg, + stmin, stmax, &brackt + ); + } + + /* + Force a sufficient decrease in the interval of uncertainty. + */ + if (brackt) { + if (0.66 * prev_width <= fabs(sty - stx)) { + *stp = stx + 0.5 * (sty - stx); + } + prev_width = width; + width = fabs(sty - stx); + } + } + + return LBFGSERR_LOGICERROR; +} + + + +/** + * Define the local variables for computing minimizers. + */ +#define USES_MINIMIZER \ + lbfgsfloatval_t a, d, gamma, theta, p, q, r, s; + +/** + * Find a minimizer of an interpolated cubic function. + * @param cm The minimizer of the interpolated cubic. + * @param u The value of one point, u. + * @param fu The value of f(u). + * @param du The value of f'(u). + * @param v The value of another point, v. + * @param fv The value of f(v). + * @param du The value of f'(v). + */ +#define CUBIC_MINIMIZER(cm, u, fu, du, v, fv, dv) \ + d = (v) - (u); \ + theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \ + p = fabs(theta); \ + q = fabs(du); \ + r = fabs(dv); \ + s = max3(p, q, r); \ + /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \ + a = theta / s; \ + gamma = s * sqrt(a * a - ((du) / s) * ((dv) / s)); \ + if ((v) < (u)) gamma = -gamma; \ + p = gamma - (du) + theta; \ + q = gamma - (du) + gamma + (dv); \ + r = p / q; \ + (cm) = (u) + r * d; + +/** + * Find a minimizer of an interpolated cubic function. + * @param cm The minimizer of the interpolated cubic. + * @param u The value of one point, u. + * @param fu The value of f(u). + * @param du The value of f'(u). + * @param v The value of another point, v. + * @param fv The value of f(v). + * @param du The value of f'(v). + * @param xmin The maximum value. + * @param xmin The minimum value. + */ +#define CUBIC_MINIMIZER2(cm, u, fu, du, v, fv, dv, xmin, xmax) \ + d = (v) - (u); \ + theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \ + p = fabs(theta); \ + q = fabs(du); \ + r = fabs(dv); \ + s = max3(p, q, r); \ + /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \ + a = theta / s; \ + gamma = s * sqrt(max2(0, a * a - ((du) / s) * ((dv) / s))); \ + if ((u) < (v)) gamma = -gamma; \ + p = gamma - (dv) + theta; \ + q = gamma - (dv) + gamma + (du); \ + r = p / q; \ + if (r < 0. && gamma != 0.) { \ + (cm) = (v) - r * d; \ + } else if (a < 0) { \ + (cm) = (xmax); \ + } else { \ + (cm) = (xmin); \ + } + +/** + * Find a minimizer of an interpolated quadratic function. + * @param qm The minimizer of the interpolated quadratic. + * @param u The value of one point, u. + * @param fu The value of f(u). + * @param du The value of f'(u). + * @param v The value of another point, v. + * @param fv The value of f(v). + */ +#define QUARD_MINIMIZER(qm, u, fu, du, v, fv) \ + a = (v) - (u); \ + (qm) = (u) + (du) / (((fu) - (fv)) / a + (du)) / 2 * a; + +/** + * Find a minimizer of an interpolated quadratic function. + * @param qm The minimizer of the interpolated quadratic. + * @param u The value of one point, u. + * @param du The value of f'(u). + * @param v The value of another point, v. + * @param dv The value of f'(v). + */ +#define QUARD_MINIMIZER2(qm, u, du, v, dv) \ + a = (u) - (v); \ + (qm) = (v) + (dv) / ((dv) - (du)) * a; + +/** + * Update a safeguarded trial value and interval for line search. + * + * The parameter x represents the step with the least function value. + * The parameter t represents the current step. This function assumes + * that the derivative at the point of x in the direction of the step. + * If the bracket is set to true, the minimizer has been bracketed in + * an interval of uncertainty with endpoints between x and y. + * + * @param x The pointer to the value of one endpoint. + * @param fx The pointer to the value of f(x). + * @param dx The pointer to the value of f'(x). + * @param y The pointer to the value of another endpoint. + * @param fy The pointer to the value of f(y). + * @param dy The pointer to the value of f'(y). + * @param t The pointer to the value of the trial value, t. + * @param ft The pointer to the value of f(t). + * @param dt The pointer to the value of f'(t). + * @param tmin The minimum value for the trial value, t. + * @param tmax The maximum value for the trial value, t. + * @param brackt The pointer to the predicate if the trial value is + * bracketed. + * @retval int Status value. Zero indicates a normal termination. + * + * @see + * Jorge J. More and David J. Thuente. Line search algorithm with + * guaranteed sufficient decrease. ACM Transactions on Mathematical + * Software (TOMS), Vol 20, No 3, pp. 286-307, 1994. + */ +static int update_trial_interval( + lbfgsfloatval_t *x, + lbfgsfloatval_t *fx, + lbfgsfloatval_t *dx, + lbfgsfloatval_t *y, + lbfgsfloatval_t *fy, + lbfgsfloatval_t *dy, + lbfgsfloatval_t *t, + lbfgsfloatval_t *ft, + lbfgsfloatval_t *dt, + const lbfgsfloatval_t tmin, + const lbfgsfloatval_t tmax, + int *brackt + ) +{ + int bound; + int dsign = fsigndiff(dt, dx); + lbfgsfloatval_t mc; /* minimizer of an interpolated cubic. */ + lbfgsfloatval_t mq; /* minimizer of an interpolated quadratic. */ + lbfgsfloatval_t newt; /* new trial value. */ + USES_MINIMIZER; /* for CUBIC_MINIMIZER and QUARD_MINIMIZER. */ + + /* Check the input parameters for errors. */ + if (*brackt) { + if (*t <= min2(*x, *y) || max2(*x, *y) <= *t) { + /* The trival value t is out of the interval. */ + return LBFGSERR_OUTOFINTERVAL; + } + if (0. <= *dx * (*t - *x)) { + /* The function must decrease from x. */ + return LBFGSERR_INCREASEGRADIENT; + } + if (tmax < tmin) { + /* Incorrect tmin and tmax specified. */ + return LBFGSERR_INCORRECT_TMINMAX; + } + } + + /* + Trial value selection. + */ + if (*fx < *ft) { + /* + Case 1: a higher function value. + The minimum is brackt. If the cubic minimizer is closer + to x than the quadratic one, the cubic one is taken, else + the average of the minimizers is taken. + */ + *brackt = 1; + bound = 1; + CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt); + QUARD_MINIMIZER(mq, *x, *fx, *dx, *t, *ft); + if (fabs(mc - *x) < fabs(mq - *x)) { + newt = mc; + } else { + newt = mc + 0.5 * (mq - mc); + } + } else if (dsign) { + /* + Case 2: a lower function value and derivatives of + opposite sign. The minimum is brackt. If the cubic + minimizer is closer to x than the quadratic (secant) one, + the cubic one is taken, else the quadratic one is taken. + */ + *brackt = 1; + bound = 0; + CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt); + QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt); + if (fabs(mc - *t) > fabs(mq - *t)) { + newt = mc; + } else { + newt = mq; + } + } else if (fabs(*dt) < fabs(*dx)) { + /* + Case 3: a lower function value, derivatives of the + same sign, and the magnitude of the derivative decreases. + The cubic minimizer is only used if the cubic tends to + infinity in the direction of the minimizer or if the minimum + of the cubic is beyond t. Otherwise the cubic minimizer is + defined to be either tmin or tmax. The quadratic (secant) + minimizer is also computed and if the minimum is brackt + then the the minimizer closest to x is taken, else the one + farthest away is taken. + */ + bound = 1; + CUBIC_MINIMIZER2(mc, *x, *fx, *dx, *t, *ft, *dt, tmin, tmax); + QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt); + if (*brackt) { + if (fabs(*t - mc) < fabs(*t - mq)) { + newt = mc; + } else { + newt = mq; + } + } else { + if (fabs(*t - mc) > fabs(*t - mq)) { + newt = mc; + } else { + newt = mq; + } + } + } else { + /* + Case 4: a lower function value, derivatives of the + same sign, and the magnitude of the derivative does + not decrease. If the minimum is not brackt, the step + is either tmin or tmax, else the cubic minimizer is taken. + */ + bound = 0; + if (*brackt) { + CUBIC_MINIMIZER(newt, *t, *ft, *dt, *y, *fy, *dy); + } else if (*x < *t) { + newt = tmax; + } else { + newt = tmin; + } + } + + /* + Update the interval of uncertainty. This update does not + depend on the new step or the case analysis above. + + - Case a: if f(x) < f(t), + x <- x, y <- t. + - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0, + x <- t, y <- y. + - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0, + x <- t, y <- x. + */ + if (*fx < *ft) { + /* Case a */ + *y = *t; + *fy = *ft; + *dy = *dt; + } else { + /* Case c */ + if (dsign) { + *y = *x; + *fy = *fx; + *dy = *dx; + } + /* Cases b and c */ + *x = *t; + *fx = *ft; + *dx = *dt; + } + + /* Clip the new trial value in [tmin, tmax]. */ + if (tmax < newt) newt = tmax; + if (newt < tmin) newt = tmin; + + /* + Redefine the new trial value if it is close to the upper bound + of the interval. + */ + if (*brackt && bound) { + mq = *x + 0.66 * (*y - *x); + if (*x < *y) { + if (mq < newt) newt = mq; + } else { + if (newt < mq) newt = mq; + } + } + + /* Return the new trial value. */ + *t = newt; + return 0; +} + + + + + +static lbfgsfloatval_t owlqn_x1norm( + const lbfgsfloatval_t* x, + const int start, + const int n + ) +{ + int i; + lbfgsfloatval_t norm = 0.; + + for (i = start;i < n;++i) { + norm += fabs(x[i]); + } + + return norm; +} + +static void owlqn_pseudo_gradient( + lbfgsfloatval_t* pg, + const lbfgsfloatval_t* x, + const lbfgsfloatval_t* g, + const int n, + const lbfgsfloatval_t c, + const int start, + const int end + ) +{ + int i; + + /* Compute the negative of gradients. */ + for (i = 0;i < start;++i) { + pg[i] = g[i]; + } + + /* Compute the psuedo-gradients. */ + for (i = start;i < end;++i) { + if (x[i] < 0.) { + /* Differentiable. */ + pg[i] = g[i] - c; + } else if (0. < x[i]) { + /* Differentiable. */ + pg[i] = g[i] + c; + } else { + if (g[i] < -c) { + /* Take the right partial derivative. */ + pg[i] = g[i] + c; + } else if (c < g[i]) { + /* Take the left partial derivative. */ + pg[i] = g[i] - c; + } else { + pg[i] = 0.; + } + } + } + + for (i = end;i < n;++i) { + pg[i] = g[i]; + } +} + +static void owlqn_project( + lbfgsfloatval_t* d, + const lbfgsfloatval_t* sign, + const int start, + const int end + ) +{ + int i; + + for (i = start;i < end;++i) { + if (d[i] * sign[i] <= 0) { + d[i] = 0; + } + } +} From 2f2dcf574f7b58a11d17f0e466834d3179dd998d Mon Sep 17 00:00:00 2001 From: Brad King Date: Fri, 24 Jan 2025 14:59:50 -0500 Subject: [PATCH 4/4] ENH: Integrate libLBFGS into our CMake build system Fixes: #3135 --- .../include/itkLBFGS2Optimizerv4.h | 2 +- Modules/ThirdParty/libLBFGS/CMakeLists.txt | 7 ++++- .../ThirdParty/libLBFGS/src/CMakeLists.txt | 20 +------------- Modules/ThirdParty/libLBFGS/src/itk_lbfgs.h | 27 +++++++++++++++++++ .../libLBFGS/src/itklbfgs/CMakeLists.txt | 20 ++++++++++++++ 5 files changed, 55 insertions(+), 21 deletions(-) create mode 100644 Modules/ThirdParty/libLBFGS/src/itk_lbfgs.h create mode 100644 Modules/ThirdParty/libLBFGS/src/itklbfgs/CMakeLists.txt diff --git a/Modules/Numerics/Optimizersv4/include/itkLBFGS2Optimizerv4.h b/Modules/Numerics/Optimizersv4/include/itkLBFGS2Optimizerv4.h index 0d6f509f188..0fff5ef4aea 100644 --- a/Modules/Numerics/Optimizersv4/include/itkLBFGS2Optimizerv4.h +++ b/Modules/Numerics/Optimizersv4/include/itkLBFGS2Optimizerv4.h @@ -22,7 +22,7 @@ #include "ITKOptimizersv4Export.h" #include -#include "lbfgs.h" +#include "itk_lbfgs.h" namespace itk { diff --git a/Modules/ThirdParty/libLBFGS/CMakeLists.txt b/Modules/ThirdParty/libLBFGS/CMakeLists.txt index 4014a511fbb..1d4179167e7 100644 --- a/Modules/ThirdParty/libLBFGS/CMakeLists.txt +++ b/Modules/ThirdParty/libLBFGS/CMakeLists.txt @@ -13,9 +13,14 @@ set(ITKLIBLBFGS_THIRD_PARTY 1) # set(ITKLIBLBFGS_NO_SRC 1) #else() -set(ITKLIBLBFGS_INCLUDE_DIRS ${ITKLIBLBFGS_SOURCE_DIR}/include ) +set(ITKLIBLBFGS_INCLUDE_DIRS ${ITKLIBLBFGS_SOURCE_DIR}/src) set(ITKLIBLBFGS_LIBRARIES itklbfgs) #endif() itk_module_impl() + +install(FILES ${ITKLIBLBFGS_SOURCE_DIR}/src/itk_lbfgs.h + DESTINATION ${ITKLIBLBFGS_INSTALL_INCLUDE_DIR} + COMPONENT Development + ) diff --git a/Modules/ThirdParty/libLBFGS/src/CMakeLists.txt b/Modules/ThirdParty/libLBFGS/src/CMakeLists.txt index 7babdc9462e..390dbcad0a0 100644 --- a/Modules/ThirdParty/libLBFGS/src/CMakeLists.txt +++ b/Modules/ThirdParty/libLBFGS/src/CMakeLists.txt @@ -1,25 +1,7 @@ -ADD_LIBRARY( itklbfgs - lbfgs.c -) - -IF(ITK_LIBRARY_PROPERTIES) - SET_TARGET_PROPERTIES(itklbfgs PROPERTIES ${ITK_LIBRARY_PROPERTIES}) -ENDIF(ITK_LIBRARY_PROPERTIES) - set(ITK3P_INSTALL_EXPORT_NAME "${ITKLIBLBFGS-targets}") set(ITK3P_INSTALL_INCLUDE_DIR "${ITKLIBLBFGS_INSTALL_INCLUDE_DIR}") set(ITK3P_INSTALL_RUNTIME_DIR "${ITKLIBLBFGS_INSTALL_RUNTIME_DIR}") set(ITK3P_INSTALL_LIBRARY_DIR "${ITKLIBLBFGS_INSTALL_LIBRARY_DIR}") set(ITK3P_INSTALL_ARCHIVE_DIR "${ITKLIBLBFGS_INSTALL_ARCHIVE_DIR}") - -IF(UNIX) - TARGET_LINK_LIBRARIES(itklbfgs m) -ENDIF(UNIX) - -INSTALL(TARGETS itklbfgs - EXPORT ${ITK3P_INSTALL_EXPORT_NAME} - RUNTIME DESTINATION ${ITK3P_INSTALL_RUNTIME_DIR} COMPONENT RuntimeLibraries - LIBRARY DESTINATION ${ITK3P_INSTALL_LIBRARY_DIR} COMPONENT RuntimeLibraries - ARCHIVE DESTINATION ${ITK3P_INSTALL_ARCHIVE_DIR} COMPONENT Development) - +add_subdirectory(itklbfgs) itk_module_target(itklbfgs NO_INSTALL) diff --git a/Modules/ThirdParty/libLBFGS/src/itk_lbfgs.h b/Modules/ThirdParty/libLBFGS/src/itk_lbfgs.h new file mode 100644 index 00000000000..3f55150d779 --- /dev/null +++ b/Modules/ThirdParty/libLBFGS/src/itk_lbfgs.h @@ -0,0 +1,27 @@ +/*========================================================================= + * + * Copyright NumFOCUS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + *=========================================================================*/ +/*========================================================================= + * Include file to wrap the internal ITK libLBFGS library header + *=========================================================================*/ + +#ifndef itk_lbfgs_h +#define itk_lbfgs_h + +#include + +#endif diff --git a/Modules/ThirdParty/libLBFGS/src/itklbfgs/CMakeLists.txt b/Modules/ThirdParty/libLBFGS/src/itklbfgs/CMakeLists.txt new file mode 100644 index 00000000000..41007f09388 --- /dev/null +++ b/Modules/ThirdParty/libLBFGS/src/itklbfgs/CMakeLists.txt @@ -0,0 +1,20 @@ +add_library(itklbfgs lib/lbfgs.c) +target_include_directories(itklbfgs PRIVATE include) + +if(ITK_LIBRARY_PROPERTIES) + set_target_properties(itklbfgs PROPERTIES ${ITK_LIBRARY_PROPERTIES}) +endif() + +if(UNIX) + target_link_libraries(itklbfgs m) +endif() + +install(TARGETS itklbfgs + EXPORT ${ITK3P_INSTALL_EXPORT_NAME} + RUNTIME DESTINATION ${ITK3P_INSTALL_RUNTIME_DIR} COMPONENT RuntimeLibraries + LIBRARY DESTINATION ${ITK3P_INSTALL_LIBRARY_DIR} COMPONENT RuntimeLibraries + ARCHIVE DESTINATION ${ITK3P_INSTALL_ARCHIVE_DIR} COMPONENT Development) + +install(FILES + ${CMAKE_CURRENT_SOURCE_DIR}/include/lbfgs.h + DESTINATION ${ITK3P_INSTALL_INCLUDE_DIR}/itklbfgs/include)