[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

vigra/regression.hxx VIGRA

00001 /************************************************************************/
00002 /*                                                                      */
00003 /*                  Copyright 2008 by Ullrich Koethe                    */
00004 /*                                                                      */
00005 /*    This file is part of the VIGRA computer vision library.           */
00006 /*    The VIGRA Website is                                              */
00007 /*        http://hci.iwr.uni-heidelberg.de/vigra/                       */
00008 /*    Please direct questions, bug reports, and contributions to        */
00009 /*        ullrich.koethe@iwr.uni-heidelberg.de    or                    */
00010 /*        vigra@informatik.uni-hamburg.de                               */
00011 /*                                                                      */
00012 /*    Permission is hereby granted, free of charge, to any person       */
00013 /*    obtaining a copy of this software and associated documentation    */
00014 /*    files (the "Software"), to deal in the Software without           */
00015 /*    restriction, including without limitation the rights to use,      */
00016 /*    copy, modify, merge, publish, distribute, sublicense, and/or      */
00017 /*    sell copies of the Software, and to permit persons to whom the    */
00018 /*    Software is furnished to do so, subject to the following          */
00019 /*    conditions:                                                       */
00020 /*                                                                      */
00021 /*    The above copyright notice and this permission notice shall be    */
00022 /*    included in all copies or substantial portions of the             */
00023 /*    Software.                                                         */
00024 /*                                                                      */
00025 /*    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND    */
00026 /*    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES   */
00027 /*    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND          */
00028 /*    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT       */
00029 /*    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,      */
00030 /*    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING      */
00031 /*    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR     */
00032 /*    OTHER DEALINGS IN THE SOFTWARE.                                   */
00033 /*                                                                      */
00034 /************************************************************************/
00035 
00036 
00037 #ifndef VIGRA_REGRESSION_HXX
00038 #define VIGRA_REGRESSION_HXX
00039 
00040 #include "matrix.hxx"
00041 #include "linear_solve.hxx"
00042 #include "singular_value_decomposition.hxx"
00043 #include "numerictraits.hxx"
00044 #include "functorexpression.hxx"
00045 
00046 
00047 namespace vigra
00048 {
00049 
00050 namespace linalg
00051 {
00052 
00053 /** \addtogroup Optimization Optimization and Regression
00054  */
00055 //@{
00056    /** Ordinary Least Squares Regression.
00057 
00058        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00059        and a column vector \a b of length <tt>m</tt> rows, this function computes
00060        the column vector \a x of length <tt>n</tt> rows that solves the optimization problem
00061 
00062         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00063             \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
00064         \f]
00065 
00066        When \a b is a matrix with <tt>k</tt> columns, \a x must also have
00067        <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
00068        \a b. Note that all matrices must already have the correct shape.
00069 
00070        This function is just another name for \ref linearSolve(), perhaps
00071        leading to more readable code when \a A is a rectangular matrix. It returns
00072        <tt>false</tt> when the rank of \a A is less than <tt>n</tt>.
00073        See \ref linearSolve() for more documentation.
00074 
00075     <b>\#include</b> <vigra/regression.hxx>
00076         Namespaces: vigra and vigra::linalg
00077    */
00078 template <class T, class C1, class C2, class C3>
00079 inline bool
00080 leastSquares(MultiArrayView<2, T, C1> const & A,
00081              MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x,
00082              std::string method = "QR")
00083 {
00084     return linearSolve(A, b, x, method);
00085 }
00086 
00087    /** Weighted Least Squares Regression.
00088 
00089        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00090        a vector \a b of length <tt>m</tt>, and a weight vector \a weights of length <tt>m</tt>
00091        with non-negative entries, this function computes the vector \a x of length <tt>n</tt>
00092        that solves the optimization problem
00093 
00094         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00095             \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
00096              \textrm{diag}(\textrm{\bf weights})
00097              \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)
00098         \f]
00099 
00100        where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
00101        The algorithm calls \ref leastSquares() on the equivalent problem
00102 
00103         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00104              \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
00105                   \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2
00106         \f]
00107 
00108        where the square root of \a weights is just taken element-wise.
00109 
00110        When \a b is a matrix with <tt>k</tt> columns, \a x must also have
00111        <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
00112        \a b. Note that all matrices must already have the correct shape.
00113 
00114        The function returns
00115        <tt>false</tt> when the rank of the weighted matrix \a A is less than <tt>n</tt>.
00116 
00117     <b>\#include</b> <vigra/regression.hxx>
00118         Namespaces: vigra and vigra::linalg
00119    */
00120 template <class T, class C1, class C2, class C3, class C4>
00121 bool
00122 weightedLeastSquares(MultiArrayView<2, T, C1> const & A,
00123              MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
00124              MultiArrayView<2, T, C4> &x, std::string method = "QR")
00125 {
00126     typedef T Real;
00127 
00128     const unsigned int rows = rowCount(A);
00129     const unsigned int cols = columnCount(A);
00130     const unsigned int rhsCount = columnCount(b);
00131     vigra_precondition(rows >= cols,
00132        "weightedLeastSquares(): Input matrix A must be rectangular with rowCount >= columnCount.");
00133     vigra_precondition(rowCount(b) == rows,
00134        "weightedLeastSquares(): Shape mismatch between matrices A and b.");
00135     vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
00136        "weightedLeastSquares(): Weight matrix has wrong shape.");
00137     vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
00138        "weightedLeastSquares(): Result matrix x has wrong shape.");
00139 
00140     Matrix<T> wa(A.shape()), wb(b.shape());
00141 
00142     for(unsigned int k=0; k<rows; ++k)
00143     {
00144         vigra_precondition(weights(k,0) >= 0,
00145            "weightedLeastSquares(): Weights must be positive.");
00146         T w = std::sqrt(weights(k,0));
00147         for(unsigned int l=0; l<cols; ++l)
00148             wa(k,l) = w * A(k,l);
00149         for(unsigned int l=0; l<rhsCount; ++l)
00150             wb(k,l) = w * b(k,l);
00151     }
00152 
00153     return leastSquares(wa, wb, x, method);
00154 }
00155 
00156    /** Ridge Regression.
00157 
00158        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00159        a vector \a b of length <tt>m</tt>, and a regularization parameter <tt>lambda >= 0.0</tt>,
00160        this function computes the vector \a x of length <tt>n</tt>
00161        that solves the optimization problem
00162 
00163         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00164             \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 +
00165             \lambda \textrm{\bf x}^T\textrm{\bf x}
00166         \f]
00167 
00168        This is implemented by means of \ref singularValueDecomposition().
00169 
00170        When \a b is a matrix with <tt>k</tt> columns, \a x must also have
00171        <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
00172        \a b. Note that all matrices must already have the correct shape.
00173 
00174        The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
00175        and <tt>lambda == 0.0</tt>.
00176 
00177     <b>\#include</b> <vigra/regression.hxx>
00178         Namespaces: vigra and vigra::linalg
00179    */
00180 template <class T, class C1, class C2, class C3>
00181 bool
00182 ridgeRegression(MultiArrayView<2, T, C1> const & A,
00183                 MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, double lambda)
00184 {
00185     typedef T Real;
00186 
00187     const unsigned int rows = rowCount(A);
00188     const unsigned int cols = columnCount(A);
00189     const unsigned int rhsCount = columnCount(b);
00190     vigra_precondition(rows >= cols,
00191        "ridgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
00192     vigra_precondition(rowCount(b) == rows,
00193        "ridgeRegression(): Shape mismatch between matrices A and b.");
00194     vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
00195        "ridgeRegression(): Result matrix x has wrong shape.");
00196     vigra_precondition(lambda >= 0.0,
00197        "ridgeRegression(): lambda >= 0.0 required.");
00198 
00199     unsigned int m = rows;
00200     unsigned int n = cols;
00201 
00202     Matrix<T> u(m, n), s(n, 1), v(n, n);
00203 
00204     unsigned int rank = singularValueDecomposition(A, u, s, v);
00205     if(rank < n && lambda == 0.0)
00206         return false;
00207 
00208     Matrix<T> t = transpose(u)*b;
00209     for(unsigned int k=0; k<cols; ++k)
00210         for(unsigned int l=0; l<rhsCount; ++l)
00211             t(k,l) *= s(k,0) / (sq(s(k,0)) + lambda);
00212     x = v*t;
00213     return true;
00214 }
00215 
00216    /** Weighted ridge Regression.
00217 
00218        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00219        a vector \a b of length <tt>m</tt>, a weight vector \a weights of length <tt>m</tt>
00220        with non-negative entries, and a regularization parameter <tt>lambda >= 0.0</tt>
00221        this function computes the vector \a x of length <tt>n</tt>
00222        that solves the optimization problem
00223 
00224         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00225             \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
00226              \textrm{diag}(\textrm{\bf weights})
00227              \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right) +
00228              \lambda \textrm{\bf x}^T\textrm{\bf x}
00229         \f]
00230 
00231        where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
00232        The algorithm calls \ref ridgeRegression() on the equivalent problem
00233 
00234         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00235             \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
00236                   \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2 +
00237              \lambda \textrm{\bf x}^T\textrm{\bf x}
00238         \f]
00239 
00240        where the square root of \a weights is just taken element-wise.  This solution is
00241        computed by means of \ref singularValueDecomposition().
00242 
00243        When \a b is a matrix with <tt>k</tt> columns, \a x must also have
00244        <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
00245        \a b. Note that all matrices must already have the correct shape.
00246 
00247        The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
00248        and <tt>lambda == 0.0</tt>.
00249 
00250     <b>\#include</b> <vigra/regression.hxx>
00251         Namespaces: vigra and vigra::linalg
00252    */
00253 template <class T, class C1, class C2, class C3, class C4>
00254 bool
00255 weightedRidgeRegression(MultiArrayView<2, T, C1> const & A,
00256              MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
00257              MultiArrayView<2, T, C4> &x, double lambda)
00258 {
00259     typedef T Real;
00260 
00261     const unsigned int rows = rowCount(A);
00262     const unsigned int cols = columnCount(A);
00263     const unsigned int rhsCount = columnCount(b);
00264     vigra_precondition(rows >= cols,
00265        "weightedRidgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
00266     vigra_precondition(rowCount(b) == rows,
00267        "weightedRidgeRegression(): Shape mismatch between matrices A and b.");
00268     vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
00269        "weightedRidgeRegression(): Weight matrix has wrong shape.");
00270     vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
00271        "weightedRidgeRegression(): Result matrix x has wrong shape.");
00272     vigra_precondition(lambda >= 0.0,
00273        "weightedRidgeRegression(): lambda >= 0.0 required.");
00274 
00275     Matrix<T> wa(A.shape()), wb(b.shape());
00276 
00277     for(unsigned int k=0; k<rows; ++k)
00278     {
00279         vigra_precondition(weights(k,0) >= 0,
00280            "weightedRidgeRegression(): Weights must be positive.");
00281         T w = std::sqrt(weights(k,0));
00282         for(unsigned int l=0; l<cols; ++l)
00283             wa(k,l) = w * A(k,l);
00284         for(unsigned int l=0; l<rhsCount; ++l)
00285             wb(k,l) = w * b(k,l);
00286     }
00287 
00288     return ridgeRegression(wa, wb, x, lambda);
00289 }
00290 
00291    /** Ridge Regression with many lambdas.
00292 
00293        This executes \ref ridgeRegression() for a sequence of regularization parameters. This
00294        is implemented so that the \ref singularValueDecomposition() has to be executed only once.
00295        \a lambda must be an array conforming to the <tt>std::vector</tt> interface, i.e. must
00296        support <tt>lambda.size()</tt> and <tt>lambda[k]</tt>. The columns of the matrix \a x
00297        will contain the solutions for the corresponding lambda, so the  number of columns of
00298        the matrix \a x must be equal to <tt>lambda.size()</tt>, and \a b must be a columns vector,
00299        i.e. cannot contain several right hand sides at once.
00300 
00301        The function returns <tt>false</tt> when the matrix \a A is rank deficient. If this
00302        happens, and one of the lambdas is zero, the corresponding column of \a x will be skipped.
00303 
00304     <b>\#include</b> <vigra/regression.hxx>
00305         Namespaces: vigra and vigra::linalg
00306    */
00307 template <class T, class C1, class C2, class C3, class Array>
00308 bool
00309 ridgeRegressionSeries(MultiArrayView<2, T, C1> const & A,
00310           MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, Array const & lambda)
00311 {
00312     typedef T Real;
00313 
00314     const unsigned int rows = rowCount(A);
00315     const unsigned int cols = columnCount(A);
00316     const unsigned int lambdaCount = lambda.size();
00317     vigra_precondition(rows >= cols,
00318        "ridgeRegressionSeries(): Input matrix A must be rectangular with rowCount >= columnCount.");
00319     vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
00320        "ridgeRegressionSeries(): Shape mismatch between matrices A and b.");
00321     vigra_precondition(rowCount(x) == cols && columnCount(x) == lambdaCount,
00322        "ridgeRegressionSeries(): Result matrix x has wrong shape.");
00323 
00324     unsigned int m = rows;
00325     unsigned int n = cols;
00326 
00327     Matrix<T> u(m, n), s(n, 1), v(n, n);
00328 
00329     unsigned int rank = singularValueDecomposition(A, u, s, v);
00330 
00331     Matrix<T> xl = transpose(u)*b;
00332     Matrix<T> xt(cols,1);
00333     for(unsigned int i=0; i<lambdaCount; ++i)
00334     {
00335         vigra_precondition(lambda[i] >= 0.0,
00336            "ridgeRegressionSeries(): lambda >= 0.0 required.");
00337         if(lambda[i] == 0.0 && rank < rows)
00338             continue;
00339         for(unsigned int k=0; k<cols; ++k)
00340             xt(k,0) = xl(k,0) * s(k,0) / (sq(s(k,0)) + lambda[i]);
00341         columnVector(x, i) = v*xt;
00342     }
00343     return (rank == n);
00344 }
00345 
00346 /** \brief Pass options to leastAngleRegression().
00347 
00348     <b>\#include</b> <vigra/regression.hxx>
00349         Namespaces: vigra and vigra::linalg
00350 */
00351 class LeastAngleRegressionOptions
00352 {
00353   public:
00354     enum Mode { LARS, LASSO, NNLASSO };
00355 
00356         /** Initialize all options with default values.
00357         */
00358     LeastAngleRegressionOptions()
00359     : max_solution_count(0),
00360       unconstrained_dimension_count(0),
00361       mode(LASSO),
00362       least_squares_solutions(true)
00363     {}
00364 
00365         /** Maximum number of solutions to be computed.
00366 
00367             If \a n is 0 (the default), the number of solutions is determined by the length
00368             of the solution array. Otherwise, the minimum of maxSolutionCount() and that
00369             length is taken.<br>
00370             Default: 0 (use length of solution array)
00371         */
00372     LeastAngleRegressionOptions & maxSolutionCount(unsigned int n)
00373     {
00374         max_solution_count = (int)n;
00375         return *this;
00376     }
00377 
00378         /** Set the mode of the algorithm.
00379 
00380             Mode must be one of "lars", "lasso", "nnlasso". The function just calls
00381             the member function of the corresponding name to set the mode.
00382 
00383             Default: "lasso"
00384         */
00385     LeastAngleRegressionOptions & setMode(std::string mode)
00386     {
00387         for(unsigned int k=0; k<mode.size(); ++k)
00388             mode[k] = (std::string::value_type)tolower(mode[k]);
00389         if(mode == "lars")
00390             this->lars();
00391         else if(mode == "lasso")
00392             this->lasso();
00393         else if(mode == "nnlasso")
00394             this->nnlasso();
00395         else
00396             vigra_fail("LeastAngleRegressionOptions.setMode(): Invalid mode.");
00397         return *this;
00398     }
00399 
00400 
00401         /** Use the plain LARS algorithm.
00402 
00403             Default: inactive
00404         */
00405     LeastAngleRegressionOptions & lars()
00406     {
00407         mode = LARS;
00408         return *this;
00409     }
00410 
00411         /** Use the LASSO modification of the LARS algorithm.
00412 
00413             This allows features to be removed from the active set under certain conditions.<br>
00414             Default: active
00415         */
00416     LeastAngleRegressionOptions & lasso()
00417     {
00418         mode = LASSO;
00419         return *this;
00420     }
00421 
00422         /** Use the non-negative LASSO modification of the LARS algorithm.
00423 
00424             This enforces all non-zero entries in the solution to be positive.<br>
00425             Default: inactive
00426         */
00427     LeastAngleRegressionOptions & nnlasso()
00428     {
00429         mode = NNLASSO;
00430         return *this;
00431     }
00432 
00433         /** Compute least squares solutions.
00434 
00435             Use least angle regression to determine active sets, but
00436             return least squares solutions for the features in each active set,
00437             instead of constrained solutions.<br>
00438             Default: <tt>true</tt>
00439         */
00440     LeastAngleRegressionOptions & leastSquaresSolutions(bool select = true)
00441     {
00442         least_squares_solutions = select;
00443         return *this;
00444     }
00445 
00446     int max_solution_count, unconstrained_dimension_count;
00447     Mode mode;
00448     bool least_squares_solutions;
00449 };
00450 
00451 namespace detail {
00452 
00453 template <class T, class C1, class C2>
00454 struct LarsData
00455 {
00456     typedef typename MultiArrayShape<2>::type Shape;
00457 
00458     int activeSetSize;
00459     MultiArrayView<2, T, C1> A;
00460     MultiArrayView<2, T, C2> b;
00461     Matrix<T> R, qtb, lars_solution, lars_prediction, next_lsq_solution, next_lsq_prediction, searchVector;
00462     ArrayVector<MultiArrayIndex> columnPermutation;
00463 
00464     // init data for a new run
00465     LarsData(MultiArrayView<2, T, C1> const & Ai, MultiArrayView<2, T, C2> const & bi)
00466     : activeSetSize(1),
00467       A(Ai), b(bi), R(A), qtb(b),
00468       lars_solution(A.shape(1), 1), lars_prediction(A.shape(0), 1),
00469       next_lsq_solution(A.shape(1), 1), next_lsq_prediction(A.shape(0), 1), searchVector(A.shape(0), 1),
00470       columnPermutation(A.shape(1))
00471     {
00472         for(unsigned int k=0; k<columnPermutation.size(); ++k)
00473             columnPermutation[k] = k;
00474     }
00475 
00476     // copy data for the recursive call in nnlassolsq
00477     LarsData(LarsData const & d, int asetSize)
00478     : activeSetSize(asetSize),
00479       A(d.R.subarray(Shape(0,0), Shape(d.A.shape(0), activeSetSize))), b(d.qtb), R(A), qtb(b),
00480       lars_solution(d.lars_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), lars_prediction(d.lars_prediction),
00481       next_lsq_solution(d.next_lsq_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), 
00482       next_lsq_prediction(d.next_lsq_prediction), searchVector(d.searchVector),
00483       columnPermutation(A.shape(1))
00484     {
00485         for(unsigned int k=0; k<columnPermutation.size(); ++k)
00486             columnPermutation[k] = k;
00487     }
00488 };
00489 
00490 template <class T, class C1, class C2, class Array1, class Array2>
00491 unsigned int leastAngleRegressionMainLoop(LarsData<T, C1, C2> & d,
00492                   Array1 & activeSets, Array2 * lars_solutions, Array2 * lsq_solutions,
00493                   LeastAngleRegressionOptions const & options)
00494 {
00495     using namespace vigra::functor;
00496 
00497     typedef typename MultiArrayShape<2>::type Shape;
00498     typedef typename Matrix<T>::view_type Subarray;
00499     typedef ArrayVector<MultiArrayIndex> Permutation;
00500     typedef typename Permutation::view_type ColumnSet;
00501 
00502     vigra_precondition(d.activeSetSize > 0,
00503        "leastAngleRegressionMainLoop() must not be called with empty active set.");
00504 
00505     bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
00506     bool lasso_modification = (options.mode != LeastAngleRegressionOptions::LARS);
00507 
00508     const MultiArrayIndex rows = rowCount(d.R);
00509     const MultiArrayIndex cols = columnCount(d.R);
00510     const MultiArrayIndex maxRank = std::min(rows, cols);
00511 
00512     MultiArrayIndex maxSolutionCount = options.max_solution_count;
00513     if(maxSolutionCount == 0)
00514         maxSolutionCount = lasso_modification
00515                                 ? 10*maxRank
00516                                 : maxRank;
00517 
00518     bool needToRemoveColumn = false;
00519     MultiArrayIndex columnToBeAdded = 0, columnToBeRemoved = 0;
00520     MultiArrayIndex currentSolutionCount = 0;
00521     while(currentSolutionCount < maxSolutionCount)
00522     {
00523         ColumnSet activeSet = d.columnPermutation.subarray(0, (unsigned int)d.activeSetSize);
00524         ColumnSet inactiveSet = d.columnPermutation.subarray((unsigned int)d.activeSetSize, (unsigned int)cols);
00525 
00526         // find next dimension to be activated
00527         Matrix<T> cLARS = transpose(d.A) * (d.b - d.lars_prediction),      // correlation with LARS residual
00528                   cLSQ  = transpose(d.A) * (d.b - d.next_lsq_prediction);  // correlation with LSQ residual
00529 
00530         // In theory, all vectors in the active set should have the same correlation C, and
00531         // the correlation of all others should not exceed this. In practice, we may find the
00532         // maximum correlation in any variable due to tiny numerical inaccuracies. Therefore, we
00533         // determine C from the entire set of variables.
00534         MultiArrayIndex cmaxIndex = enforce_positive
00535                                       ? argMax(cLARS)
00536                                       : argMax(abs(cLARS));
00537         T C = abs(cLARS(cmaxIndex, 0));
00538 
00539         Matrix<T> ac(cols - d.activeSetSize, 1);
00540         for(MultiArrayIndex k = 0; k<cols-d.activeSetSize; ++k)
00541         {
00542             T rho = cLSQ(inactiveSet[k], 0), 
00543               cc  = C - sign(rho)*cLARS(inactiveSet[k], 0);
00544 
00545             if(rho == 0.0)  // make sure that 0/0 cannot happen in the other cases
00546                 ac(k,0) = 1.0; // variable k is linearly dependent on the active set
00547             else if(rho > 0.0)
00548                 ac(k,0) = cc / (cc + rho); // variable k would enter the active set with positive sign
00549             else if(enforce_positive)
00550                 ac(k,0) = 1.0; // variable k cannot enter the active set because it would be negative
00551             else
00552                 ac(k,0) = cc / (cc - rho); // variable k would enter the active set with negative sign
00553         }
00554 
00555         // in the non-negative case: make sure that a column just removed cannot re-enter right away
00556         // (in standard LASSO, this is allowed, because the variable may re-enter with opposite sign)
00557         if(enforce_positive && needToRemoveColumn)
00558                 ac(columnToBeRemoved-d.activeSetSize,0) = 1.0;
00559 
00560         // find candidate
00561         // Note: R uses Arg1() > epsilon, but this is only possible because it allows several variables to
00562         //       join the active set simultaneously, so that gamma = 0 cannot occur.
00563         columnToBeAdded = argMin(ac);
00564 
00565         // if no new column can be added, we do a full step gamma = 1.0 and then stop, unless a column is removed below
00566         T gamma = (d.activeSetSize == maxRank)
00567                      ? 1.0
00568                      : ac(columnToBeAdded, 0);
00569 
00570         // adjust columnToBeAdded: we skipped the active set
00571         if(columnToBeAdded >= 0)
00572             columnToBeAdded += d.activeSetSize;
00573 
00574         // check whether we have to remove a column from the active set
00575         needToRemoveColumn = false;
00576         if(lasso_modification)
00577         {
00578             // find dimensions whose weight changes sign below gamma*searchDirection
00579             Matrix<T> s(Shape(d.activeSetSize, 1), NumericTraits<T>::max());
00580             for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
00581             {
00582                 if(( enforce_positive && d.next_lsq_solution(k,0) < 0.0) ||
00583                    (!enforce_positive && sign(d.lars_solution(k,0))*sign(d.next_lsq_solution(k,0)) == -1.0))
00584                         s(k,0) = d.lars_solution(k,0) / (d.lars_solution(k,0) - d.next_lsq_solution(k,0));
00585             }
00586 
00587             columnToBeRemoved = argMinIf(s, Arg1() <= Param(gamma));
00588             if(columnToBeRemoved >= 0)
00589             {
00590                 needToRemoveColumn = true; // remove takes precedence over add
00591                 gamma = s(columnToBeRemoved, 0);
00592             }
00593         }
00594 
00595         // compute the current solutions
00596         d.lars_prediction  = gamma * d.next_lsq_prediction + (1.0 - gamma) * d.lars_prediction;
00597         d.lars_solution    = gamma * d.next_lsq_solution   + (1.0 - gamma) * d.lars_solution;
00598         if(needToRemoveColumn)
00599             d.lars_solution(columnToBeRemoved, 0) = 0.0;  // turn possible epsilon into an exact zero
00600 
00601         // write the current solution
00602         ++currentSolutionCount;
00603         activeSets.push_back(typename Array1::value_type(d.columnPermutation.begin(), d.columnPermutation.begin()+d.activeSetSize));
00604 
00605         if(lsq_solutions != 0)
00606         {
00607             if(enforce_positive)
00608             {
00609                 ArrayVector<Matrix<T> > nnresults;
00610                 ArrayVector<ArrayVector<MultiArrayIndex> > nnactiveSets;
00611                 LarsData<T, C1, C2> nnd(d, d.activeSetSize);
00612 
00613                 leastAngleRegressionMainLoop(nnd, nnactiveSets, &nnresults, (Array2*)0,
00614                                              LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
00615                 Matrix<T> nnlsq_solution(d.activeSetSize, 1);
00616                 for(unsigned int k=0; k<nnactiveSets.back().size(); ++k)
00617                 {
00618                     nnlsq_solution(nnactiveSets.back()[k],0) = nnresults.back()[k];
00619                 }
00620                 lsq_solutions->push_back(nnlsq_solution);
00621             }
00622             else
00623             {
00624                 lsq_solutions->push_back(d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
00625             }
00626         }
00627         if(lars_solutions != 0)
00628         {
00629             lars_solutions->push_back(d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
00630         }
00631 
00632         // no further solutions possible
00633         if(gamma == 1.0)
00634             break;
00635 
00636         if(needToRemoveColumn)
00637         {
00638             --d.activeSetSize;
00639             if(columnToBeRemoved != d.activeSetSize)
00640             {
00641                 // remove column 'columnToBeRemoved' and restore triangular form of R
00642                 // note: columnPermutation is automatically swapped here
00643                 detail::upperTriangularSwapColumns(columnToBeRemoved, d.activeSetSize, d.R, d.qtb, d.columnPermutation);
00644 
00645                 // swap solution entries
00646                 std::swap(d.lars_solution(columnToBeRemoved, 0), d.lars_solution(d.activeSetSize,0));
00647                 std::swap(d.next_lsq_solution(columnToBeRemoved, 0), d.next_lsq_solution(d.activeSetSize,0));
00648                 columnToBeRemoved = d.activeSetSize; // keep track of removed column
00649             }
00650             d.lars_solution(d.activeSetSize,0) = 0.0;
00651             d.next_lsq_solution(d.activeSetSize,0) = 0.0;
00652         }
00653         else
00654         {
00655             vigra_invariant(columnToBeAdded >= 0,
00656                 "leastAngleRegression(): internal error (columnToBeAdded < 0)");
00657             // add column 'columnToBeAdded'
00658             if(d.activeSetSize != columnToBeAdded)
00659             {
00660                 std::swap(d.columnPermutation[d.activeSetSize], d.columnPermutation[columnToBeAdded]);
00661                 columnVector(d.R, d.activeSetSize).swapData(columnVector(d.R, columnToBeAdded));
00662                 columnToBeAdded = d.activeSetSize; // keep track of added column
00663             }
00664 
00665             // zero the corresponding entries of the solutions
00666             d.next_lsq_solution(d.activeSetSize,0) = 0.0;
00667             d.lars_solution(d.activeSetSize,0) = 0.0;
00668 
00669             // reduce R (i.e. its newly added column) to triangular form
00670             detail::qrColumnHouseholderStep(d.activeSetSize, d.R, d.qtb);
00671             ++d.activeSetSize;
00672         }
00673 
00674         // compute the LSQ solution of the new active set
00675         Subarray Ractive = d.R.subarray(Shape(0,0), Shape(d.activeSetSize, d.activeSetSize));
00676         Subarray qtbactive = d.qtb.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
00677         Subarray next_lsq_solution_view = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
00678         linearSolveUpperTriangular(Ractive, qtbactive, next_lsq_solution_view);
00679 
00680         // compute the LSQ prediction of the new active set
00681         d.next_lsq_prediction.init(0.0);
00682         for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
00683             d.next_lsq_prediction += next_lsq_solution_view(k,0)*columnVector(d.A, d.columnPermutation[k]);
00684     }
00685 
00686     return (unsigned int)currentSolutionCount;
00687 }
00688 
00689 template <class T, class C1, class C2, class Array1, class Array2>
00690 unsigned int
00691 leastAngleRegressionImpl(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00692                          Array1 & activeSets, Array2 * lasso_solutions, Array2 * lsq_solutions,
00693                          LeastAngleRegressionOptions const & options)
00694 {
00695     using namespace vigra::functor;
00696 
00697     const MultiArrayIndex rows = rowCount(A);
00698 
00699     vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
00700        "leastAngleRegression(): Shape mismatch between matrices A and b.");
00701 
00702     bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
00703 
00704     detail::LarsData<T, C1, C2> d(A, b);
00705 
00706     // find dimension with largest correlation
00707     Matrix<T> c = transpose(A)*b;
00708     MultiArrayIndex initialColumn = enforce_positive
00709                                        ? argMaxIf(c, Arg1() > Param(0.0))
00710                                        : argMax(abs(c));
00711     if(initialColumn == -1)
00712         return 0; // no solution found
00713 
00714     // prepare initial active set and search direction etc.
00715     std::swap(d.columnPermutation[0], d.columnPermutation[initialColumn]);
00716     columnVector(d.R, 0).swapData(columnVector(d.R, initialColumn));
00717     detail::qrColumnHouseholderStep(0, d.R, d.qtb);
00718     d.next_lsq_solution(0,0) = d.qtb(0,0) / d.R(0,0);
00719     d.next_lsq_prediction = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
00720     d.searchVector = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
00721 
00722     return leastAngleRegressionMainLoop(d, activeSets, lasso_solutions, lsq_solutions, options);
00723 }
00724 
00725 } // namespace detail
00726 
00727    /** Least Angle Regression.
00728 
00729     <b>\#include</b> <vigra/regression.hxx>
00730         Namespaces: vigra and vigra::linalg
00731 
00732    <b> Declarations:</b>
00733 
00734     \code
00735     namespace vigra {
00736       namespace linalg {
00737         // compute either LASSO or least squares solutions
00738         template <class T, class C1, class C2, class Array1, class Array2>
00739         unsigned int
00740         leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00741                              Array1 & activeSets, Array2 & solutions,
00742                              LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
00743 
00744         // compute LASSO and least squares solutions
00745         template <class T, class C1, class C2, class Array1, class Array2>
00746         unsigned int
00747         leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00748                              Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
00749                              LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
00750       }
00751       using linalg::leastAngleRegression;
00752     }
00753     \endcode
00754 
00755        This function implements Least Angle Regression (LARS) as described in
00756 
00757        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
00758        B.Efron, T.Hastie, I.Johnstone, and R.Tibshirani: <em>"Least Angle Regression"</em>,
00759        Annals of Statistics 32(2):407-499, 2004.
00760 
00761        It is an efficient algorithm to solve the L1-regularized least squares (LASSO) problem
00762 
00763         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00764              \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
00765            \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s
00766         \f]
00767 
00768        and the L1-regularized non-negative least squares (NN-LASSO) problem
00769 
00770         \f[ \tilde \textrm{\bf x} = \textrm{argmin} \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
00771            \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s \textrm{ and } \textrm{\bf x}\ge \textrm{\bf 0}
00772         \f]
00773 
00774        where \a A is a matrix with <tt>m</tt> rows and <tt>n</tt> columns (often with <tt>m < n</tt>),
00775        \a b a vector of length <tt>m</tt>, and a regularization parameter s >= 0.0.
00776        L1-regularization has the desirable effect that it causes the solution \a x to be sparse, i.e. only
00777        the most important variables (called the <em>active set</em>) have non-zero values. The
00778        key insight of the LARS algorithm is the following: When the solution vector is considered
00779        as a function of the regularization parameter s, then <b>x</b>(s) is a piecewise
00780        linear function, i.e. a polyline in n-dimensional space. The knots of the polyline
00781        occur precisely at those values of s where one variable enters or leaves the active set,
00782        and can be efficiently computed.
00783 
00784        Therefore, leastAngleRegression() returns the entire solution path as a sequence of knot points, starting
00785        at \f$\textrm{\bf x}(s=0)\f$ (where the only feasible solution is obviously <b>x</b> = 0) and ending at
00786        \f$\textrm{\bf x}(s=\infty)\f$ (where the solution becomes the ordinary least squares solution). Actually,
00787        the initial null solution is not explicitly returned, i.e. the sequence starts at the first non-zero
00788        solution with one variable in the active set. The function leastAngleRegression() returns the number
00789        of solutions( i.e. knot points) computed.
00790 
00791        The sequences of active sets and corresponding variable weights are returned in \a activeSets and
00792        \a solutions respectively. That is, <tt>activeSets[i]</tt> is an \ref vigra::ArrayVector "ArrayVector<int>"
00793        containing the indices of the variables that are active at the i-th knot, and <tt>solutions</tt> is a
00794        \ref vigra::linalg::Matrix "Matrix<T>" containing the weights of those variables, in the same order (see
00795        example below). Variables not contained in <tt>activeSets[i]</tt> are zero at this solution.
00796 
00797        The behavior of the algorithm can be adapted by \ref vigra::linalg::LeastAngleRegressionOptions
00798        "LeastAngleRegressionOptions":
00799         <DL>
00800         <DT><b>options.lasso()</b> (active by default)
00801                           <DD> Compute the LASSO solution as described above.
00802         <DT><b>options.nnlasso()</b> (inactive by default)
00803                           <DD> Compute non-negative LASSO solutions, i.e. use the additional constraint that
00804                                <b>x</b> >= 0 in all solutions.
00805         <DT><b>options.lars()</b> (inactive by default)
00806                           <DD> Compute a solution path according to the plain LARS rule, i.e. never remove
00807                                a variable from the active set once it entered.
00808         <DT><b>options.leastSquaresSolutions(bool)</b> (default: true)
00809                           <DD> Use the algorithm mode selected above
00810                                to determine the sequence of active sets, but then compute and return an
00811                                ordinary (unconstrained) least squares solution for every active set.<br>
00812                                <b>Note:</b> The second form of leastAngleRegression() ignores this option and
00813                                does always compute both constrained and unconstrained solutions (returned in
00814                                \a lasso_solutions and \a lsq_solutions respectively).
00815         <DT><b>maxSolutionCount(unsigned int n)</b> (default: n = 0, i.e. compute all solutions)
00816                           <DD> Compute at most <tt>n</tt> solutions.
00817         </DL>
00818 
00819         <b>Usage:</b>
00820 
00821         \code
00822         int m = ..., n = ...;
00823         Matrix<double> A(m, n), b(m, 1);
00824         ... // fill A and b
00825 
00826         // normalize the input
00827         Matrix<double> offset(1,n), scaling(1,n);
00828         prepareColumns(A, A, offset, scaling, DataPreparationGoals(ZeroMean|UnitVariance));
00829         prepareColumns(b, b, DataPreparationGoals(ZeroMean));
00830 
00831         // arrays to hold the output
00832         ArrayVector<ArrayVector<int> > activeSets;
00833         ArrayVector<Matrix<double> > solutions;
00834 
00835         // run leastAngleRegression() in non-negative LASSO mode
00836         int numSolutions = leastAngleRegression(A, b, activeSets, solutions,
00837                                     LeastAngleRegressionOptions().nnlasso());
00838 
00839         // print results
00840         Matrix<double> denseSolution(1, n);
00841         for (MultiArrayIndex k = 0; k < numSolutions; ++k)
00842         {
00843             // transform the sparse solution into a dense vector
00844             denseSolution.init(0.0); // ensure that inactive variables are zero
00845             for (unsigned int i = 0; i < activeSets[k].size(); ++i)
00846             {
00847                 // set the values of the active variables;
00848                 // activeSets[k][i] is the true index of the i-th variable in the active set
00849                 denseSolution(0, activeSets[k][i]) = solutions[k](i,0);
00850             }
00851 
00852             // invert the input normalization
00853             denseSolution = denseSolution * pointWise(scaling);
00854 
00855             // output the solution
00856             std::cout << "solution " << k << ":\n" << denseSolution << std::endl;
00857         }
00858         \endcode
00859 
00860         <b>Required Interface:</b>
00861 
00862         <ul>
00863         <li> <tt>T</tt> must be numeric type (compatible to double)
00864         <li> <tt>Array1 a1;</tt><br>
00865              <tt>a1.push_back(ArrayVector<int>());</tt>
00866         <li> <tt>Array2 a2;</tt><br>
00867              <tt>a2.push_back(Matrix<T>());</tt>
00868         </ul>
00869    */
00870 doxygen_overloaded_function(template <...> unsigned int leastAngleRegression)
00871 
00872 template <class T, class C1, class C2, class Array1, class Array2>
00873 inline unsigned int
00874 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00875                      Array1 & activeSets, Array2 & solutions,
00876                      LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
00877 {
00878     if(options.least_squares_solutions)
00879         return detail::leastAngleRegressionImpl(A, b, activeSets, (Array2*)0, &solutions, options);
00880     else
00881         return detail::leastAngleRegressionImpl(A, b, activeSets, &solutions, (Array2*)0, options);
00882 }
00883 
00884 template <class T, class C1, class C2, class Array1, class Array2>
00885 inline unsigned int
00886 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
00887                      Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
00888                      LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
00889 {
00890     return detail::leastAngleRegressionImpl(A, b, activeSets, &lasso_solutions, &lsq_solutions, options);
00891 }
00892 
00893    /** Non-negative Least Squares Regression.
00894 
00895        Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
00896        and a column vector \a b of length <tt>m</tt> rows, this function computes
00897        a column vector \a x of length <tt>n</tt> with <b>non-negative entries</b> that solves the optimization problem
00898 
00899         \f[ \tilde \textrm{\bf x} = \textrm{argmin}
00900             \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
00901             \textrm{ subject to } \textrm{\bf x} \ge \textrm{\bf 0}
00902         \f]
00903 
00904        Both \a b and \a x must be column vectors (i.e. matrices with <tt>1</tt> column).
00905        Note that all matrices must already have the correct shape. The solution is computed by means
00906        of \ref leastAngleRegression() with non-negativity constraint.
00907 
00908     <b>\#include</b> <vigra/regression.hxx>
00909         Namespaces: vigra and vigra::linalg
00910    */
00911 template <class T, class C1, class C2, class C3>
00912 inline void
00913 nonnegativeLeastSquares(MultiArrayView<2, T, C1> const & A,
00914                         MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x)
00915 {
00916     vigra_precondition(columnCount(A) == rowCount(x) && rowCount(A) == rowCount(b),
00917         "nonnegativeLeastSquares(): Matrix shape mismatch.");
00918     vigra_precondition(columnCount(b) == 1 && columnCount(x) == 1,
00919         "nonnegativeLeastSquares(): RHS and solution must be vectors (i.e. columnCount == 1).");
00920 
00921     ArrayVector<ArrayVector<MultiArrayIndex> > activeSets;
00922     ArrayVector<Matrix<T> > results;
00923 
00924     leastAngleRegression(A, b, activeSets, results,
00925                          LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
00926     x.init(NumericTraits<T>::zero());
00927     if(activeSets.size() > 0)
00928         for(unsigned int k=0; k<activeSets.back().size(); ++k)
00929             x(activeSets.back()[k],0) = results.back()[k];
00930 }
00931 
00932 
00933 //@}
00934 
00935 } // namespace linalg
00936 
00937 using linalg::leastSquares;
00938 using linalg::weightedLeastSquares;
00939 using linalg::ridgeRegression;
00940 using linalg::weightedRidgeRegression;
00941 using linalg::ridgeRegressionSeries;
00942 using linalg::nonnegativeLeastSquares;
00943 using linalg::leastAngleRegression;
00944 using linalg::LeastAngleRegressionOptions;
00945 
00946 } // namespace vigra
00947 
00948 #endif // VIGRA_REGRESSION_HXX

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.8.0 (20 Sep 2011)