My Project  UNKNOWN_GIT_VERSION
bdsvd.h
Go to the documentation of this file.
1 /*************************************************************************
2 Copyright (c) 1992-2007 The University of Tennessee. All rights reserved.
3 
4 Contributors:
5  * Sergey Bochkanov (ALGLIB project). Translation from FORTRAN to
6  pseudocode.
7 
8 See subroutines comments for additional copyrights.
9 
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are
12 met:
13 
14 - Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16 
17 - Redistributions in binary form must reproduce the above copyright
18  notice, this list of conditions and the following disclaimer listed
19  in this license in the documentation and/or other materials
20  provided with the distribution.
21 
22 - Neither the name of the copyright holders nor the names of its
23  contributors may be used to endorse or promote products derived from
24  this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 *************************************************************************/
38 
39 #ifndef _bdsvd_h
40 #define _bdsvd_h
41 
42 #include "ap.h"
43 #include "amp.h"
44 #include "rotations.h"
45 namespace bdsvd
46 {
47  template<unsigned int Precision>
50  int n,
51  bool isupper,
52  bool isfractionalaccuracyrequired,
54  int nru,
56  int ncc,
58  int ncvt);
59  template<unsigned int Precision>
62  int n,
63  bool isupper,
64  bool isfractionalaccuracyrequired,
66  int nru,
68  int ncc,
70  int ncvt);
71  template<unsigned int Precision>
74  int n,
75  bool isupper,
76  bool isfractionalaccuracyrequired,
78  int ustart,
79  int nru,
81  int cstart,
82  int ncc,
84  int vstart,
85  int ncvt);
86  template<unsigned int Precision>
89  template<unsigned int Precision>
93  amp::ampf<Precision>& ssmin,
94  amp::ampf<Precision>& ssmax);
95  template<unsigned int Precision>
99  amp::ampf<Precision>& ssmin,
100  amp::ampf<Precision>& ssmax,
104  amp::ampf<Precision>& csl);
105 
106 
107  /*************************************************************************
108  Singular value decomposition of a bidiagonal matrix (extended algorithm)
109 
110  The algorithm performs the singular value decomposition of a bidiagonal
111  matrix B (upper or lower) representing it as B = Q*S*P^T, where Q and P -
112  orthogonal matrices, S - diagonal matrix with non-negative elements on the
113  main diagonal, in descending order.
114 
115  The algorithm finds singular values. In addition, the algorithm can
116  calculate matrices Q and P (more precisely, not the matrices, but their
117  product with given matrices U and VT - U*Q and (P^T)*VT)). Of course,
118  matrices U and VT can be of any type, including identity. Furthermore, the
119  algorithm can calculate Q'*C (this product is calculated more effectively
120  than U*Q, because this calculation operates with rows instead of matrix
121  columns).
122 
123  The feature of the algorithm is its ability to find all singular values
124  including those which are arbitrarily close to 0 with relative accuracy
125  close to machine precision. If the parameter IsFractionalAccuracyRequired
126  is set to True, all singular values will have high relative accuracy close
127  to machine precision. If the parameter is set to False, only the biggest
128  singular value will have relative accuracy close to machine precision.
129  The absolute error of other singular values is equal to the absolute error
130  of the biggest singular value.
131 
132  Input parameters:
133  D - main diagonal of matrix B.
134  Array whose index ranges within [0..N-1].
135  E - superdiagonal (or subdiagonal) of matrix B.
136  Array whose index ranges within [0..N-2].
137  N - size of matrix B.
138  IsUpper - True, if the matrix is upper bidiagonal.
139  IsFractionalAccuracyRequired -
140  accuracy to search singular values with.
141  U - matrix to be multiplied by Q.
142  Array whose indexes range within [0..NRU-1, 0..N-1].
143  The matrix can be bigger, in that case only the submatrix
144  [0..NRU-1, 0..N-1] will be multiplied by Q.
145  NRU - number of rows in matrix U.
146  C - matrix to be multiplied by Q'.
147  Array whose indexes range within [0..N-1, 0..NCC-1].
148  The matrix can be bigger, in that case only the submatrix
149  [0..N-1, 0..NCC-1] will be multiplied by Q'.
150  NCC - number of columns in matrix C.
151  VT - matrix to be multiplied by P^T.
152  Array whose indexes range within [0..N-1, 0..NCVT-1].
153  The matrix can be bigger, in that case only the submatrix
154  [0..N-1, 0..NCVT-1] will be multiplied by P^T.
155  NCVT - number of columns in matrix VT.
156 
157  Output parameters:
158  D - singular values of matrix B in descending order.
159  U - if NRU>0, contains matrix U*Q.
160  VT - if NCVT>0, contains matrix (P^T)*VT.
161  C - if NCC>0, contains matrix Q'*C.
162 
163  Result:
164  True, if the algorithm has converged.
165  False, if the algorithm hasn't converged (rare case).
166 
167  Additional information:
168  The type of convergence is controlled by the internal parameter TOL.
169  If the parameter is greater than 0, the singular values will have
170  relative accuracy TOL. If TOL<0, the singular values will have
171  absolute accuracy ABS(TOL)*norm(B).
172  By default, |TOL| falls within the range of 10*Epsilon and 100*Epsilon,
173  where Epsilon is the machine precision. It is not recommended to use
174  TOL less than 10*Epsilon since this will considerably slow down the
175  algorithm and may not lead to error decreasing.
176  History:
177  * 31 March, 2007.
178  changed MAXITR from 6 to 12.
179 
180  -- LAPACK routine (version 3.0) --
181  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
182  Courant Institute, Argonne National Lab, and Rice University
183  October 31, 1999.
184  *************************************************************************/
185  template<unsigned int Precision>
188  int n,
189  bool isupper,
190  bool isfractionalaccuracyrequired,
192  int nru,
194  int ncc,
196  int ncvt)
197  {
198  bool result;
201 
202 
203  d1.setbounds(1, n);
204  ap::vmove(d1.getvector(1, n), d.getvector(0, n-1));
205  if( n>1 )
206  {
207  e1.setbounds(1, n-1);
208  ap::vmove(e1.getvector(1, n-1), e.getvector(0, n-2));
209  }
210  result = bidiagonalsvddecompositioninternal<Precision>(d1, e1, n, isupper, isfractionalaccuracyrequired, u, 0, nru, c, 0, ncc, vt, 0, ncvt);
211  ap::vmove(d.getvector(0, n-1), d1.getvector(1, n));
212  return result;
213  }
214 
215 
216  /*************************************************************************
217  Obsolete 1-based subroutine. See RMatrixBDSVD for 0-based replacement.
218 
219  History:
220  * 31 March, 2007.
221  changed MAXITR from 6 to 12.
222 
223  -- LAPACK routine (version 3.0) --
224  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
225  Courant Institute, Argonne National Lab, and Rice University
226  October 31, 1999.
227  *************************************************************************/
228  template<unsigned int Precision>
231  int n,
232  bool isupper,
233  bool isfractionalaccuracyrequired,
235  int nru,
237  int ncc,
239  int ncvt)
240  {
241  bool result;
242 
243 
244  result = bidiagonalsvddecompositioninternal<Precision>(d, e, n, isupper, isfractionalaccuracyrequired, u, 1, nru, c, 1, ncc, vt, 1, ncvt);
245  return result;
246  }
247 
248 
249  /*************************************************************************
250  Internal working subroutine for bidiagonal decomposition
251  *************************************************************************/
252  template<unsigned int Precision>
255  int n,
256  bool isupper,
257  bool isfractionalaccuracyrequired,
259  int ustart,
260  int nru,
262  int cstart,
263  int ncc,
265  int vstart,
266  int ncvt)
267  {
268  bool result;
269  int i;
270  int idir;
271  int isub;
272  int iter;
273  int j;
274  int ll;
275  int lll;
276  int m;
277  int maxit;
278  int oldll;
279  int oldm;
290  amp::ampf<Precision> oldcs;
291  amp::ampf<Precision> oldsn;
293  amp::ampf<Precision> shift;
294  amp::ampf<Precision> sigmn;
295  amp::ampf<Precision> sigmx;
301  amp::ampf<Precision> sminl;
302  amp::ampf<Precision> sminlo;
303  amp::ampf<Precision> sminoa;
305  amp::ampf<Precision> thresh;
307  amp::ampf<Precision> tolmul;
313  int maxitr;
314  bool matrixsplitflag;
315  bool iterflag;
320  bool rightside;
321  bool fwddir;
323  int mm1;
324  int mm0;
325  bool bchangedir;
326  int uend;
327  int cend;
328  int vend;
329 
330 
331  result = true;
332  if( n==0 )
333  {
334  return result;
335  }
336  if( n==1 )
337  {
338  if( d(1)<0 )
339  {
340  d(1) = -d(1);
341  if( ncvt>0 )
342  {
343  ap::vmul(vt.getrow(vstart, vstart, vstart+ncvt-1), -1);
344  }
345  }
346  return result;
347  }
348 
349  //
350  // init
351  //
352  work0.setbounds(1, n-1);
353  work1.setbounds(1, n-1);
354  work2.setbounds(1, n-1);
355  work3.setbounds(1, n-1);
356  uend = ustart+ap::maxint(nru-1, 0);
357  vend = vstart+ap::maxint(ncvt-1, 0);
358  cend = cstart+ap::maxint(ncc-1, 0);
359  utemp.setbounds(ustart, uend);
360  vttemp.setbounds(vstart, vend);
361  ctemp.setbounds(cstart, cend);
362  maxitr = 12;
363  rightside = true;
364  fwddir = true;
365 
366  //
367  // resize E from N-1 to N
368  //
369  etemp.setbounds(1, n);
370  for(i=1; i<=n-1; i++)
371  {
372  etemp(i) = e(i);
373  }
374  e.setbounds(1, n);
375  for(i=1; i<=n-1; i++)
376  {
377  e(i) = etemp(i);
378  }
379  e(n) = 0;
380  idir = 0;
381 
382  //
383  // Get machine constants
384  //
387 
388  //
389  // If matrix lower bidiagonal, rotate to be upper bidiagonal
390  // by applying Givens rotations on the left
391  //
392  if( !isupper )
393  {
394  for(i=1; i<=n-1; i++)
395  {
396  rotations::generaterotation<Precision>(d(i), e(i), cs, sn, r);
397  d(i) = r;
398  e(i) = sn*d(i+1);
399  d(i+1) = cs*d(i+1);
400  work0(i) = cs;
401  work1(i) = sn;
402  }
403 
404  //
405  // Update singular vectors if desired
406  //
407  if( nru>0 )
408  {
409  rotations::applyrotationsfromtheright<Precision>(fwddir, ustart, uend, 1+ustart-1, n+ustart-1, work0, work1, u, utemp);
410  }
411  if( ncc>0 )
412  {
413  rotations::applyrotationsfromtheleft<Precision>(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, work0, work1, c, ctemp);
414  }
415  }
416 
417  //
418  // Compute singular values to relative accuracy TOL
419  // (By setting TOL to be negative, algorithm will compute
420  // singular values to absolute accuracy ABS(TOL)*norm(input matrix))
421  //
422  tolmul = amp::maximum<Precision>(10, amp::minimum<Precision>(100, amp::pow<Precision>(eps, -amp::ampf<Precision>("0.125"))));
423  tol = tolmul*eps;
424  if( !isfractionalaccuracyrequired )
425  {
426  tol = -tol;
427  }
428 
429  //
430  // Compute approximate maximum, minimum singular values
431  //
432  smax = 0;
433  for(i=1; i<=n; i++)
434  {
435  smax = amp::maximum<Precision>(smax, amp::abs<Precision>(d(i)));
436  }
437  for(i=1; i<=n-1; i++)
438  {
439  smax = amp::maximum<Precision>(smax, amp::abs<Precision>(e(i)));
440  }
441  sminl = 0;
442  if( tol>=0 )
443  {
444 
445  //
446  // Relative accuracy desired
447  //
448  sminoa = amp::abs<Precision>(d(1));
449  if( sminoa!=0 )
450  {
451  mu = sminoa;
452  for(i=2; i<=n; i++)
453  {
454  mu = amp::abs<Precision>(d(i))*(mu/(mu+amp::abs<Precision>(e(i-1))));
455  sminoa = amp::minimum<Precision>(sminoa, mu);
456  if( sminoa==0 )
457  {
458  break;
459  }
460  }
461  }
462  sminoa = sminoa/amp::sqrt<Precision>(n);
463  thresh = amp::maximum<Precision>(tol*sminoa, maxitr*n*n*unfl);
464  }
465  else
466  {
467 
468  //
469  // Absolute accuracy desired
470  //
471  thresh = amp::maximum<Precision>(amp::abs<Precision>(tol)*smax, maxitr*n*n*unfl);
472  }
473 
474  //
475  // Prepare for main iteration loop for the singular values
476  // (MAXIT is the maximum number of passes through the inner
477  // loop permitted before nonconvergence signalled.)
478  //
479  maxit = maxitr*n*n;
480  iter = 0;
481  oldll = -1;
482  oldm = -1;
483 
484  //
485  // M points to last element of unconverged part of matrix
486  //
487  m = n;
488 
489  //
490  // Begin main iteration loop
491  //
492  while( true )
493  {
494 
495  //
496  // Check for convergence or exceeding iteration count
497  //
498  if( m<=1 )
499  {
500  break;
501  }
502  if( iter>maxit )
503  {
504  result = false;
505  return result;
506  }
507 
508  //
509  // Find diagonal block of matrix to work on
510  //
511  if( tol<0 && amp::abs<Precision>(d(m))<=thresh )
512  {
513  d(m) = 0;
514  }
515  smax = amp::abs<Precision>(d(m));
516  smin = smax;
517  matrixsplitflag = false;
518  for(lll=1; lll<=m-1; lll++)
519  {
520  ll = m-lll;
521  abss = amp::abs<Precision>(d(ll));
522  abse = amp::abs<Precision>(e(ll));
523  if( tol<0 && abss<=thresh )
524  {
525  d(ll) = 0;
526  }
527  if( abse<=thresh )
528  {
529  matrixsplitflag = true;
530  break;
531  }
532  smin = amp::minimum<Precision>(smin, abss);
533  smax = amp::maximum<Precision>(smax, amp::maximum<Precision>(abss, abse));
534  }
535  if( !matrixsplitflag )
536  {
537  ll = 0;
538  }
539  else
540  {
541 
542  //
543  // Matrix splits since E(LL) = 0
544  //
545  e(ll) = 0;
546  if( ll==m-1 )
547  {
548 
549  //
550  // Convergence of bottom singular value, return to top of loop
551  //
552  m = m-1;
553  continue;
554  }
555  }
556  ll = ll+1;
557 
558  //
559  // E(LL) through E(M-1) are nonzero, E(LL-1) is zero
560  //
561  if( ll==m-1 )
562  {
563 
564  //
565  // 2 by 2 block, handle separately
566  //
567  svdv2x2<Precision>(d(m-1), e(m-1), d(m), sigmn, sigmx, sinr, cosr, sinl, cosl);
568  d(m-1) = sigmx;
569  e(m-1) = 0;
570  d(m) = sigmn;
571 
572  //
573  // Compute singular vectors, if desired
574  //
575  if( ncvt>0 )
576  {
577  mm0 = m+(vstart-1);
578  mm1 = m-1+(vstart-1);
579  ap::vmove(vttemp.getvector(vstart, vend), vt.getrow(mm1, vstart, vend), cosr);
580  ap::vadd(vttemp.getvector(vstart, vend), vt.getrow(mm0, vstart, vend), sinr);
581  ap::vmul(vt.getrow(mm0, vstart, vend), cosr);
582  ap::vsub(vt.getrow(mm0, vstart, vend), vt.getrow(mm1, vstart, vend), sinr);
583  ap::vmove(vt.getrow(mm1, vstart, vend), vttemp.getvector(vstart, vend));
584  }
585  if( nru>0 )
586  {
587  mm0 = m+ustart-1;
588  mm1 = m-1+ustart-1;
589  ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(mm1, ustart, uend), cosl);
590  ap::vadd(utemp.getvector(ustart, uend), u.getcolumn(mm0, ustart, uend), sinl);
591  ap::vmul(u.getcolumn(mm0, ustart, uend), cosl);
592  ap::vsub(u.getcolumn(mm0, ustart, uend), u.getcolumn(mm1, ustart, uend), sinl);
593  ap::vmove(u.getcolumn(mm1, ustart, uend), utemp.getvector(ustart, uend));
594  }
595  if( ncc>0 )
596  {
597  mm0 = m+cstart-1;
598  mm1 = m-1+cstart-1;
599  ap::vmove(ctemp.getvector(cstart, cend), c.getrow(mm1, cstart, cend), cosl);
600  ap::vadd(ctemp.getvector(cstart, cend), c.getrow(mm0, cstart, cend), sinl);
601  ap::vmul(c.getrow(mm0, cstart, cend), cosl);
602  ap::vsub(c.getrow(mm0, cstart, cend), c.getrow(mm1, cstart, cend), sinl);
603  ap::vmove(c.getrow(mm1, cstart, cend), ctemp.getvector(cstart, cend));
604  }
605  m = m-2;
606  continue;
607  }
608 
609  //
610  // If working on new submatrix, choose shift direction
611  // (from larger end diagonal element towards smaller)
612  //
613  // Previously was
614  // "if (LL>OLDM) or (M<OLDLL) then"
615  // fixed thanks to Michael Rolle < m@rolle.name >
616  // Very strange that LAPACK still contains it.
617  //
618  bchangedir = false;
619  if( idir==1 && amp::abs<Precision>(d(ll))<amp::ampf<Precision>("1.0E-3")*amp::abs<Precision>(d(m)) )
620  {
621  bchangedir = true;
622  }
623  if( idir==2 && amp::abs<Precision>(d(m))<amp::ampf<Precision>("1.0E-3")*amp::abs<Precision>(d(ll)) )
624  {
625  bchangedir = true;
626  }
627  if( ll!=oldll || m!=oldm || bchangedir )
628  {
629  if( amp::abs<Precision>(d(ll))>=amp::abs<Precision>(d(m)) )
630  {
631 
632  //
633  // Chase bulge from top (big end) to bottom (small end)
634  //
635  idir = 1;
636  }
637  else
638  {
639 
640  //
641  // Chase bulge from bottom (big end) to top (small end)
642  //
643  idir = 2;
644  }
645  }
646 
647  //
648  // Apply convergence tests
649  //
650  if( idir==1 )
651  {
652 
653  //
654  // Run convergence test in forward direction
655  // First apply standard test to bottom of matrix
656  //
657  if( amp::abs<Precision>(e(m-1))<=amp::abs<Precision>(tol)*amp::abs<Precision>(d(m)) || tol<0 && amp::abs<Precision>(e(m-1))<=thresh )
658  {
659  e(m-1) = 0;
660  continue;
661  }
662  if( tol>=0 )
663  {
664 
665  //
666  // If relative accuracy desired,
667  // apply convergence criterion forward
668  //
669  mu = amp::abs<Precision>(d(ll));
670  sminl = mu;
671  iterflag = false;
672  for(lll=ll; lll<=m-1; lll++)
673  {
674  if( amp::abs<Precision>(e(lll))<=tol*mu )
675  {
676  e(lll) = 0;
677  iterflag = true;
678  break;
679  }
680  sminlo = sminl;
681  mu = amp::abs<Precision>(d(lll+1))*(mu/(mu+amp::abs<Precision>(e(lll))));
682  sminl = amp::minimum<Precision>(sminl, mu);
683  }
684  if( iterflag )
685  {
686  continue;
687  }
688  }
689  }
690  else
691  {
692 
693  //
694  // Run convergence test in backward direction
695  // First apply standard test to top of matrix
696  //
697  if( amp::abs<Precision>(e(ll))<=amp::abs<Precision>(tol)*amp::abs<Precision>(d(ll)) || tol<0 && amp::abs<Precision>(e(ll))<=thresh )
698  {
699  e(ll) = 0;
700  continue;
701  }
702  if( tol>=0 )
703  {
704 
705  //
706  // If relative accuracy desired,
707  // apply convergence criterion backward
708  //
709  mu = amp::abs<Precision>(d(m));
710  sminl = mu;
711  iterflag = false;
712  for(lll=m-1; lll>=ll; lll--)
713  {
714  if( amp::abs<Precision>(e(lll))<=tol*mu )
715  {
716  e(lll) = 0;
717  iterflag = true;
718  break;
719  }
720  sminlo = sminl;
721  mu = amp::abs<Precision>(d(lll))*(mu/(mu+amp::abs<Precision>(e(lll))));
722  sminl = amp::minimum<Precision>(sminl, mu);
723  }
724  if( iterflag )
725  {
726  continue;
727  }
728  }
729  }
730  oldll = ll;
731  oldm = m;
732 
733  //
734  // Compute shift. First, test if shifting would ruin relative
735  // accuracy, and if so set the shift to zero.
736  //
737  if( tol>=0 && n*tol*(sminl/smax)<=amp::maximum<Precision>(eps, amp::ampf<Precision>("0.01")*tol) )
738  {
739 
740  //
741  // Use a zero shift to avoid loss of relative accuracy
742  //
743  shift = 0;
744  }
745  else
746  {
747 
748  //
749  // Compute the shift from 2-by-2 block at end of matrix
750  //
751  if( idir==1 )
752  {
753  sll = amp::abs<Precision>(d(ll));
754  svd2x2<Precision>(d(m-1), e(m-1), d(m), shift, r);
755  }
756  else
757  {
758  sll = amp::abs<Precision>(d(m));
759  svd2x2<Precision>(d(ll), e(ll), d(ll+1), shift, r);
760  }
761 
762  //
763  // Test if shift negligible, and if so set to zero
764  //
765  if( sll>0 )
766  {
767  if( amp::sqr<Precision>(shift/sll)<eps )
768  {
769  shift = 0;
770  }
771  }
772  }
773 
774  //
775  // Increment iteration count
776  //
777  iter = iter+m-ll;
778 
779  //
780  // If SHIFT = 0, do simplified QR iteration
781  //
782  if( shift==0 )
783  {
784  if( idir==1 )
785  {
786 
787  //
788  // Chase bulge from top to bottom
789  // Save cosines and sines for later singular vector updates
790  //
791  cs = 1;
792  oldcs = 1;
793  for(i=ll; i<=m-1; i++)
794  {
795  rotations::generaterotation<Precision>(d(i)*cs, e(i), cs, sn, r);
796  if( i>ll )
797  {
798  e(i-1) = oldsn*r;
799  }
800  rotations::generaterotation<Precision>(oldcs*r, d(i+1)*sn, oldcs, oldsn, tmp);
801  d(i) = tmp;
802  work0(i-ll+1) = cs;
803  work1(i-ll+1) = sn;
804  work2(i-ll+1) = oldcs;
805  work3(i-ll+1) = oldsn;
806  }
807  h = d(m)*cs;
808  d(m) = h*oldcs;
809  e(m-1) = h*oldsn;
810 
811  //
812  // Update singular vectors
813  //
814  if( ncvt>0 )
815  {
816  rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp);
817  }
818  if( nru>0 )
819  {
820  rotations::applyrotationsfromtheright<Precision>(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp);
821  }
822  if( ncc>0 )
823  {
824  rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp);
825  }
826 
827  //
828  // Test convergence
829  //
830  if( amp::abs<Precision>(e(m-1))<=thresh )
831  {
832  e(m-1) = 0;
833  }
834  }
835  else
836  {
837 
838  //
839  // Chase bulge from bottom to top
840  // Save cosines and sines for later singular vector updates
841  //
842  cs = 1;
843  oldcs = 1;
844  for(i=m; i>=ll+1; i--)
845  {
846  rotations::generaterotation<Precision>(d(i)*cs, e(i-1), cs, sn, r);
847  if( i<m )
848  {
849  e(i) = oldsn*r;
850  }
851  rotations::generaterotation<Precision>(oldcs*r, d(i-1)*sn, oldcs, oldsn, tmp);
852  d(i) = tmp;
853  work0(i-ll) = cs;
854  work1(i-ll) = -sn;
855  work2(i-ll) = oldcs;
856  work3(i-ll) = -oldsn;
857  }
858  h = d(ll)*cs;
859  d(ll) = h*oldcs;
860  e(ll) = h*oldsn;
861 
862  //
863  // Update singular vectors
864  //
865  if( ncvt>0 )
866  {
867  rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp);
868  }
869  if( nru>0 )
870  {
871  rotations::applyrotationsfromtheright<Precision>(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp);
872  }
873  if( ncc>0 )
874  {
875  rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp);
876  }
877 
878  //
879  // Test convergence
880  //
881  if( amp::abs<Precision>(e(ll))<=thresh )
882  {
883  e(ll) = 0;
884  }
885  }
886  }
887  else
888  {
889 
890  //
891  // Use nonzero shift
892  //
893  if( idir==1 )
894  {
895 
896  //
897  // Chase bulge from top to bottom
898  // Save cosines and sines for later singular vector updates
899  //
900  f = (amp::abs<Precision>(d(ll))-shift)*(extsignbdsqr<Precision>(1, d(ll))+shift/d(ll));
901  g = e(ll);
902  for(i=ll; i<=m-1; i++)
903  {
904  rotations::generaterotation<Precision>(f, g, cosr, sinr, r);
905  if( i>ll )
906  {
907  e(i-1) = r;
908  }
909  f = cosr*d(i)+sinr*e(i);
910  e(i) = cosr*e(i)-sinr*d(i);
911  g = sinr*d(i+1);
912  d(i+1) = cosr*d(i+1);
913  rotations::generaterotation<Precision>(f, g, cosl, sinl, r);
914  d(i) = r;
915  f = cosl*e(i)+sinl*d(i+1);
916  d(i+1) = cosl*d(i+1)-sinl*e(i);
917  if( i<m-1 )
918  {
919  g = sinl*e(i+1);
920  e(i+1) = cosl*e(i+1);
921  }
922  work0(i-ll+1) = cosr;
923  work1(i-ll+1) = sinr;
924  work2(i-ll+1) = cosl;
925  work3(i-ll+1) = sinl;
926  }
927  e(m-1) = f;
928 
929  //
930  // Update singular vectors
931  //
932  if( ncvt>0 )
933  {
934  rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp);
935  }
936  if( nru>0 )
937  {
938  rotations::applyrotationsfromtheright<Precision>(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp);
939  }
940  if( ncc>0 )
941  {
942  rotations::applyrotationsfromtheleft<Precision>(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp);
943  }
944 
945  //
946  // Test convergence
947  //
948  if( amp::abs<Precision>(e(m-1))<=thresh )
949  {
950  e(m-1) = 0;
951  }
952  }
953  else
954  {
955 
956  //
957  // Chase bulge from bottom to top
958  // Save cosines and sines for later singular vector updates
959  //
960  f = (amp::abs<Precision>(d(m))-shift)*(extsignbdsqr<Precision>(1, d(m))+shift/d(m));
961  g = e(m-1);
962  for(i=m; i>=ll+1; i--)
963  {
964  rotations::generaterotation<Precision>(f, g, cosr, sinr, r);
965  if( i<m )
966  {
967  e(i) = r;
968  }
969  f = cosr*d(i)+sinr*e(i-1);
970  e(i-1) = cosr*e(i-1)-sinr*d(i);
971  g = sinr*d(i-1);
972  d(i-1) = cosr*d(i-1);
973  rotations::generaterotation<Precision>(f, g, cosl, sinl, r);
974  d(i) = r;
975  f = cosl*e(i-1)+sinl*d(i-1);
976  d(i-1) = cosl*d(i-1)-sinl*e(i-1);
977  if( i>ll+1 )
978  {
979  g = sinl*e(i-2);
980  e(i-2) = cosl*e(i-2);
981  }
982  work0(i-ll) = cosr;
983  work1(i-ll) = -sinr;
984  work2(i-ll) = cosl;
985  work3(i-ll) = -sinl;
986  }
987  e(ll) = f;
988 
989  //
990  // Test convergence
991  //
992  if( amp::abs<Precision>(e(ll))<=thresh )
993  {
994  e(ll) = 0;
995  }
996 
997  //
998  // Update singular vectors if desired
999  //
1000  if( ncvt>0 )
1001  {
1002  rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp);
1003  }
1004  if( nru>0 )
1005  {
1006  rotations::applyrotationsfromtheright<Precision>(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp);
1007  }
1008  if( ncc>0 )
1009  {
1010  rotations::applyrotationsfromtheleft<Precision>(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp);
1011  }
1012  }
1013  }
1014 
1015  //
1016  // QR iteration finished, go back and check convergence
1017  //
1018  continue;
1019  }
1020 
1021  //
1022  // All singular values converged, so make them positive
1023  //
1024  for(i=1; i<=n; i++)
1025  {
1026  if( d(i)<0 )
1027  {
1028  d(i) = -d(i);
1029 
1030  //
1031  // Change sign of singular vectors, if desired
1032  //
1033  if( ncvt>0 )
1034  {
1035  ap::vmul(vt.getrow(i+vstart-1, vstart, vend), -1);
1036  }
1037  }
1038  }
1039 
1040  //
1041  // Sort the singular values into decreasing order (insertion sort on
1042  // singular values, but only one transposition per singular vector)
1043  //
1044  for(i=1; i<=n-1; i++)
1045  {
1046 
1047  //
1048  // Scan for smallest D(I)
1049  //
1050  isub = 1;
1051  smin = d(1);
1052  for(j=2; j<=n+1-i; j++)
1053  {
1054  if( d(j)<=smin )
1055  {
1056  isub = j;
1057  smin = d(j);
1058  }
1059  }
1060  if( isub!=n+1-i )
1061  {
1062 
1063  //
1064  // Swap singular values and vectors
1065  //
1066  d(isub) = d(n+1-i);
1067  d(n+1-i) = smin;
1068  if( ncvt>0 )
1069  {
1070  j = n+1-i;
1071  ap::vmove(vttemp.getvector(vstart, vend), vt.getrow(isub+vstart-1, vstart, vend));
1072  ap::vmove(vt.getrow(isub+vstart-1, vstart, vend), vt.getrow(j+vstart-1, vstart, vend));
1073  ap::vmove(vt.getrow(j+vstart-1, vstart, vend), vttemp.getvector(vstart, vend));
1074  }
1075  if( nru>0 )
1076  {
1077  j = n+1-i;
1078  ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(isub+ustart-1, ustart, uend));
1079  ap::vmove(u.getcolumn(isub+ustart-1, ustart, uend), u.getcolumn(j+ustart-1, ustart, uend));
1080  ap::vmove(u.getcolumn(j+ustart-1, ustart, uend), utemp.getvector(ustart, uend));
1081  }
1082  if( ncc>0 )
1083  {
1084  j = n+1-i;
1085  ap::vmove(ctemp.getvector(cstart, cend), c.getrow(isub+cstart-1, cstart, cend));
1086  ap::vmove(c.getrow(isub+cstart-1, cstart, cend), c.getrow(j+cstart-1, cstart, cend));
1087  ap::vmove(c.getrow(j+cstart-1, cstart, cend), ctemp.getvector(cstart, cend));
1088  }
1089  }
1090  }
1091  return result;
1092  }
1093 
1094 
1095  template<unsigned int Precision>
1098  {
1100 
1101 
1102  if( b>=0 )
1103  {
1104  result = amp::abs<Precision>(a);
1105  }
1106  else
1107  {
1108  result = -amp::abs<Precision>(a);
1109  }
1110  return result;
1111  }
1112 
1113 
1114  template<unsigned int Precision>
1118  amp::ampf<Precision>& ssmin,
1119  amp::ampf<Precision>& ssmax)
1120  {
1126  amp::ampf<Precision> fhmn;
1127  amp::ampf<Precision> fhmx;
1130 
1131 
1132  fa = amp::abs<Precision>(f);
1133  ga = amp::abs<Precision>(g);
1134  ha = amp::abs<Precision>(h);
1135  fhmn = amp::minimum<Precision>(fa, ha);
1136  fhmx = amp::maximum<Precision>(fa, ha);
1137  if( fhmn==0 )
1138  {
1139  ssmin = 0;
1140  if( fhmx==0 )
1141  {
1142  ssmax = ga;
1143  }
1144  else
1145  {
1146  ssmax = amp::maximum<Precision>(fhmx, ga)*amp::sqrt<Precision>(1+amp::sqr<Precision>(amp::minimum<Precision>(fhmx, ga)/amp::maximum<Precision>(fhmx, ga)));
1147  }
1148  }
1149  else
1150  {
1151  if( ga<fhmx )
1152  {
1153  aas = 1+fhmn/fhmx;
1154  at = (fhmx-fhmn)/fhmx;
1155  au = amp::sqr<Precision>(ga/fhmx);
1156  c = 2/(amp::sqrt<Precision>(aas*aas+au)+amp::sqrt<Precision>(at*at+au));
1157  ssmin = fhmn*c;
1158  ssmax = fhmx/c;
1159  }
1160  else
1161  {
1162  au = fhmx/ga;
1163  if( au==0 )
1164  {
1165 
1166  //
1167  // Avoid possible harmful underflow if exponent range
1168  // asymmetric (true SSMIN may not underflow even if
1169  // AU underflows)
1170  //
1171  ssmin = fhmn*fhmx/ga;
1172  ssmax = ga;
1173  }
1174  else
1175  {
1176  aas = 1+fhmn/fhmx;
1177  at = (fhmx-fhmn)/fhmx;
1178  c = 1/(amp::sqrt<Precision>(1+amp::sqr<Precision>(aas*au))+amp::sqrt<Precision>(1+amp::sqr<Precision>(at*au)));
1179  ssmin = fhmn*c*au;
1180  ssmin = ssmin+ssmin;
1181  ssmax = ga/(c+c);
1182  }
1183  }
1184  }
1185  }
1186 
1187 
1188  template<unsigned int Precision>
1192  amp::ampf<Precision>& ssmin,
1193  amp::ampf<Precision>& ssmax,
1194  amp::ampf<Precision>& snr,
1195  amp::ampf<Precision>& csr,
1196  amp::ampf<Precision>& snl,
1197  amp::ampf<Precision>& csl)
1198  {
1199  bool gasmal;
1200  bool swp;
1201  int pmax;
1220  amp::ampf<Precision> temp;
1221  amp::ampf<Precision> tsign;
1224 
1225 
1226  ft = f;
1227  fa = amp::abs<Precision>(ft);
1228  ht = h;
1229  ha = amp::abs<Precision>(h);
1230 
1231  //
1232  // PMAX points to the maximum absolute element of matrix
1233  // PMAX = 1 if F largest in absolute values
1234  // PMAX = 2 if G largest in absolute values
1235  // PMAX = 3 if H largest in absolute values
1236  //
1237  pmax = 1;
1238  swp = ha>fa;
1239  if( swp )
1240  {
1241 
1242  //
1243  // Now FA .ge. HA
1244  //
1245  pmax = 3;
1246  temp = ft;
1247  ft = ht;
1248  ht = temp;
1249  temp = fa;
1250  fa = ha;
1251  ha = temp;
1252  }
1253  gt = g;
1254  ga = amp::abs<Precision>(gt);
1255  if( ga==0 )
1256  {
1257 
1258  //
1259  // Diagonal matrix
1260  //
1261  ssmin = ha;
1262  ssmax = fa;
1263  clt = 1;
1264  crt = 1;
1265  slt = 0;
1266  srt = 0;
1267  }
1268  else
1269  {
1270  gasmal = true;
1271  if( ga>fa )
1272  {
1273  pmax = 2;
1275  {
1276 
1277  //
1278  // Case of very large GA
1279  //
1280  gasmal = false;
1281  ssmax = ga;
1282  if( ha>1 )
1283  {
1284  v = ga/ha;
1285  ssmin = fa/v;
1286  }
1287  else
1288  {
1289  v = fa/ga;
1290  ssmin = v*ha;
1291  }
1292  clt = 1;
1293  slt = ht/gt;
1294  srt = 1;
1295  crt = ft/gt;
1296  }
1297  }
1298  if( gasmal )
1299  {
1300 
1301  //
1302  // Normal case
1303  //
1304  d = fa-ha;
1305  if( d==fa )
1306  {
1307  l = 1;
1308  }
1309  else
1310  {
1311  l = d/fa;
1312  }
1313  m = gt/ft;
1314  t = 2-l;
1315  mm = m*m;
1316  tt = t*t;
1317  s = amp::sqrt<Precision>(tt+mm);
1318  if( l==0 )
1319  {
1320  r = amp::abs<Precision>(m);
1321  }
1322  else
1323  {
1324  r = amp::sqrt<Precision>(l*l+mm);
1325  }
1326  a = amp::ampf<Precision>("0.5")*(s+r);
1327  ssmin = ha/a;
1328  ssmax = fa*a;
1329  if( mm==0 )
1330  {
1331 
1332  //
1333  // Note that M is very tiny
1334  //
1335  if( l==0 )
1336  {
1337  t = extsignbdsqr<Precision>(2, ft)*extsignbdsqr<Precision>(1, gt);
1338  }
1339  else
1340  {
1341  t = gt/extsignbdsqr<Precision>(d, ft)+m/t;
1342  }
1343  }
1344  else
1345  {
1346  t = (m/(s+t)+m/(r+l))*(1+a);
1347  }
1348  l = amp::sqrt<Precision>(t*t+4);
1349  crt = 2/l;
1350  srt = t/l;
1351  clt = (crt+srt*m)/a;
1352  v = ht/ft;
1353  slt = v*srt/a;
1354  }
1355  }
1356  if( swp )
1357  {
1358  csl = srt;
1359  snl = crt;
1360  csr = slt;
1361  snr = clt;
1362  }
1363  else
1364  {
1365  csl = clt;
1366  snl = slt;
1367  csr = crt;
1368  snr = srt;
1369  }
1370 
1371  //
1372  // Correct signs of SSMAX and SSMIN
1373  //
1374  if( pmax==1 )
1375  {
1376  tsign = extsignbdsqr<Precision>(1, csr)*extsignbdsqr<Precision>(1, csl)*extsignbdsqr<Precision>(1, f);
1377  }
1378  if( pmax==2 )
1379  {
1380  tsign = extsignbdsqr<Precision>(1, snr)*extsignbdsqr<Precision>(1, csl)*extsignbdsqr<Precision>(1, g);
1381  }
1382  if( pmax==3 )
1383  {
1384  tsign = extsignbdsqr<Precision>(1, snr)*extsignbdsqr<Precision>(1, snl)*extsignbdsqr<Precision>(1, h);
1385  }
1386  ssmax = extsignbdsqr<Precision>(ssmax, tsign);
1387  ssmin = extsignbdsqr<Precision>(ssmin, tsign*extsignbdsqr<Precision>(1, f)*extsignbdsqr<Precision>(1, h));
1388  }
1389 } // namespace
1390 
1391 #endif
const CanonicalForm int s
Definition: facAbsFact.cc:55
int j
Definition: facHensel.cc:105
static const ampf getAlgoPascalMinNumber()
Definition: amp.h:583
void mu(int **points, int sizePoints)
void vsub(raw_vector< T > vdst, const_raw_vector< T > vsrc)
Definition: ap.h:533
static const ampf getAlgoPascalEpsilon()
Definition: amp.h:566
CFFListIterator iter
Definition: facAbsBiFact.cc:54
Definition: bdsvd.h:45
g
Definition: cfModGcd.cc:4031
CanonicalForm b
Definition: cfModGcd.cc:4044
bool bidiagonalsvddecompositioninternal(ap::template_1d_array< amp::ampf< Precision > > &d, ap::template_1d_array< amp::ampf< Precision > > e, int n, bool isupper, bool isfractionalaccuracyrequired, ap::template_2d_array< amp::ampf< Precision > > &u, int ustart, int nru, ap::template_2d_array< amp::ampf< Precision > > &c, int cstart, int ncc, ap::template_2d_array< amp::ampf< Precision > > &vt, int vstart, int ncvt)
Definition: bdsvd.h:253
bool bidiagonalsvddecomposition(ap::template_1d_array< amp::ampf< Precision > > &d, ap::template_1d_array< amp::ampf< Precision > > e, int n, bool isupper, bool isfractionalaccuracyrequired, ap::template_2d_array< amp::ampf< Precision > > &u, int nru, ap::template_2d_array< amp::ampf< Precision > > &c, int ncc, ap::template_2d_array< amp::ampf< Precision > > &vt, int ncvt)
Definition: bdsvd.h:229
amp::ampf< Precision > extsignbdsqr(amp::ampf< Precision > a, amp::ampf< Precision > b)
Definition: bdsvd.h:1096
BOOLEAN fa(leftv res, leftv args)
Definition: cohomo.cc:3007
void vmul(raw_vector< T > vdst, T2 alpha)
Definition: ap.h:603
void vadd(raw_vector< T > vdst, const_raw_vector< T > vsrc)
Definition: ap.h:413
int m
Definition: cfEzgcd.cc:121
FILE * f
Definition: checklibs.c:9
int i
Definition: cfEzgcd.cc:125
int maxint(int m1, int m2)
Definition: ap.cpp:162
void setbounds(int iLow, int iHigh)
Definition: ap.h:735
const Variable & v
< [in] a sqrfree bivariate poly
Definition: facBivar.h:37
void svdv2x2(amp::ampf< Precision > f, amp::ampf< Precision > g, amp::ampf< Precision > h, amp::ampf< Precision > &ssmin, amp::ampf< Precision > &ssmax, amp::ampf< Precision > &snr, amp::ampf< Precision > &csr, amp::ampf< Precision > &snl, amp::ampf< Precision > &csl)
Definition: bdsvd.h:1189
void svd2x2(amp::ampf< Precision > f, amp::ampf< Precision > g, amp::ampf< Precision > h, amp::ampf< Precision > &ssmin, amp::ampf< Precision > &ssmax)
Definition: bdsvd.h:1115
raw_vector< T > getvector(int iStart, int iEnd)
Definition: ap.h:776
bool rmatrixbdsvd(ap::template_1d_array< amp::ampf< Precision > > &d, ap::template_1d_array< amp::ampf< Precision > > e, int n, bool isupper, bool isfractionalaccuracyrequired, ap::template_2d_array< amp::ampf< Precision > > &u, int nru, ap::template_2d_array< amp::ampf< Precision > > &c, int ncc, ap::template_2d_array< amp::ampf< Precision > > &vt, int ncvt)
Definition: bdsvd.h:186
Definition: amp.h:82
static Poly * h
Definition: janet.cc:972
int l
Definition: cfEzgcd.cc:93
return result
Definition: facAbsBiFact.cc:76
void vmove(raw_vector< T > vdst, const_raw_vector< T > vsrc)
Definition: ap.h:237