TooN 2.1
|
00001 #ifndef TOON_INCLUDE_DERIVATIVES_NUMERICAL_H 00002 #define TOON_INCLUDE_DERIVATIVES_NUMERICAL_H 00003 00004 #include <TooN/TooN.h> 00005 #include <vector> 00006 #include <cmath> 00007 00008 using namespace std; 00009 using namespace TooN; 00010 00011 namespace TooN{ 00012 namespace Internal{ 00013 ///@internal 00014 ///@brief Implementation of Ridder's Extrapolation to zero. 00015 ///The function input starts from 1. 00016 ///@param f Functor to extrapolate to zero. 00017 ///@ingroup gInternal 00018 template<class F, class Precision> std::pair<Precision, Precision> extrapolate_to_zero(F& f) 00019 { 00020 using std::isfinite; 00021 using std::max; 00022 using std::isnan; 00023 //Use points at c^0, c^-1, ... to extrapolate towards zero. 00024 const Precision c = 1.1, t = 2; 00025 00026 static const int Npts = 400; 00027 00028 /* Neville's table is: 00029 x_0 y_0 P_0 00030 P_{01} 00031 x_1 y_1 P_1 P_{012} 00032 P_{12} P_{0123} 00033 x_2 y_2 P_2 P_{123} 00034 P_{23} 00035 x_3 y_3 P_3 00036 00037 In Matrix form, this is rearranged as: 00038 00039 00040 x_0 y_0 P_0 00041 00042 x_1 y_1 P_1 P_{01} 00043 00044 x_2 y_2 P_2 P_{12} P_{012} 00045 00046 x_3 y_3 P_3 P_{23} P_{123} P_{0123} 00047 00048 This is rewritten further as: 00049 00050 | 0 1 2 3 00051 _____|___________________________________________________ 00052 0 | x_0 y_0 P^00 00053 | 00054 1 | x_1 y_1 P^10 P^11 00055 | 00056 2 | x_2 y_2 P^10 P^21 P^22 00057 | 00058 3 | x_3 y_3 P^10 P^31 P^23 P^33 00059 00060 So, P^ij == P_{j-i...j} 00061 00062 Finally, only the current and previous row are required. This reduces 00063 the memory cost from quadratic to linear. The naming scheme is: 00064 00065 P[i][j] -> P_i[j] 00066 P[i-1][j] -> P_i_1[j] 00067 */ 00068 00069 vector<Precision> P_i(1), P_i_1; 00070 00071 Precision best_err = HUGE_VAL; 00072 Precision best_point=HUGE_VAL; 00073 00074 //The first tranche of points might be bad. 00075 //Don't quit while no good points have ever happened. 00076 bool ever_ok=0; 00077 00078 //Compute f(x) as x goes from 1 towards zero and extrapolate to 0 00079 Precision x=1; 00080 for(int i=0; i < Npts; i++) 00081 { 00082 swap(P_i, P_i_1); 00083 P_i.resize(i+2); 00084 00085 00086 //P[i][0] = c^ -i; 00087 P_i[0] = f(x); 00088 00089 x /= c; 00090 00091 //Compute the extrapolations 00092 //cj id c^j 00093 Precision cj = 1; 00094 bool better=0; //Did we get better? 00095 bool any_ok=0; 00096 for(int j=1; j <= i; j++) 00097 { 00098 cj *= c; 00099 //We have (from above) n = i and k = n - j 00100 //n-k = i - (n - j) = i - i + j = j 00101 //Therefore c^(n-k) is just c^j 00102 00103 P_i[j] = (cj * P_i[j-1] - P_i_1[j-1]) / (cj - 1); 00104 00105 if(any_ok || isfinite(P_i[j])) 00106 ever_ok = 1; 00107 00108 //Compute the difference between the current point (high order) 00109 //and the corresponding lower order point at the current step size 00110 Precision err1 = abs(P_i[j] - P_i[j-1]); 00111 00112 //Compute the difference between two consecutive points at the 00113 //corresponding lower order point at the larger stepsize 00114 Precision err2 = abs(P_i[j] - P_i_1[j-1]); 00115 00116 //The error is the larger of these. 00117 Precision err = max(err1, err2); 00118 00119 if(err < best_err && isfinite(err)) 00120 { 00121 best_err = err; 00122 best_point = P_i[j]; 00123 better=1; 00124 } 00125 } 00126 00127 using namespace std; 00128 //If the highest order point got worse, or went off the rails, 00129 //and some good points have been seen, then break. 00130 if(ever_ok && !better && i > 0 && (abs(P_i[i] - P_i_1[i-1]) > t * best_err|| isnan(P_i[i]))) 00131 break; 00132 } 00133 00134 return std::make_pair(best_point, best_err); 00135 } 00136 00137 ///@internal 00138 ///@brief Functor wrapper for computing finite differences along an axis. 00139 ///@ingroup gInternal 00140 template<class Functor, class Precision, int Size, class Base> struct CentralDifferenceGradient 00141 { 00142 const Vector<Size, Precision, Base>& v; ///< Point about which to compute differences 00143 Vector<Size, Precision> x; ///< Local copy of v 00144 const Functor& f; ///< Functor to evaluate 00145 int i; ///< Index to difference along 00146 00147 CentralDifferenceGradient(const Vector<Size, Precision, Base>& v_, const Functor& f_) 00148 :v(v_),x(v),f(f_),i(0) 00149 {} 00150 00151 ///Compute central difference. 00152 Precision operator()(Precision hh) 00153 { 00154 using std::max; 00155 using std::abs; 00156 00157 //Make the step size be on the scale of the value. 00158 double h = hh * max(abs(v[i]) * 1e-3, 1e-3); 00159 00160 x[i] = v[i] - h; 00161 double f1 = f(x); 00162 x[i] = v[i] + h; 00163 double f2 = f(x); 00164 x[i] = v[i]; 00165 00166 double d = (f2 - f1) / (2*h); 00167 return d; 00168 } 00169 }; 00170 00171 ///@internal 00172 ///@brief Functor wrapper for computing finite difference second derivatives along an axis. 00173 ///@ingroup gInternal 00174 template<class Functor, class Precision, int Size, class Base> struct CentralDifferenceSecond 00175 { 00176 const Vector<Size, Precision, Base>& v; ///< Point about which to compute differences 00177 Vector<Size, Precision> x; ///< Local copy of v 00178 const Functor& f; ///< Functor to evaluate 00179 int i; ///< Index to difference along 00180 const Precision central; ///<Central point. 00181 00182 CentralDifferenceSecond(const Vector<Size, Precision, Base>& v_, const Functor& f_) 00183 :v(v_),x(v),f(f_),i(0),central(f(x)) 00184 {} 00185 00186 ///Compute central difference. 00187 Precision operator()(Precision hh) 00188 { 00189 using std::max; 00190 using std::abs; 00191 00192 //Make the step size be on the scale of the value. 00193 double h = hh * max(abs(v[i]) * 1e-3, 1e-3); 00194 00195 x[i] = v[i] - h; 00196 double f1 = f(x); 00197 00198 x[i] = v[i] + h; 00199 double f2 = f(x); 00200 x[i] = v[i]; 00201 00202 double d = (f2 - 2*central + f1) / (h*h); 00203 return d; 00204 } 00205 }; 00206 00207 ///@internal 00208 ///@brief Functor wrapper for computing finite difference cross derivatives along a pair of axes. 00209 ///@ingroup gInternal 00210 template<class Functor, class Precision, int Size, class Base> struct CentralCrossDifferenceSecond 00211 { 00212 const Vector<Size, Precision, Base>& v; ///< Point about which to compute differences 00213 Vector<Size, Precision> x; ///< Local copy of v 00214 const Functor& f; ///< Functor to evaluate 00215 int i; ///< Index to difference along 00216 int j; ///< Index to difference along 00217 00218 CentralCrossDifferenceSecond(const Vector<Size, Precision, Base>& v_, const Functor& f_) 00219 :v(v_),x(v),f(f_),i(0),j(0) 00220 {} 00221 00222 ///Compute central difference. 00223 Precision operator()(Precision hh) 00224 { 00225 using std::max; 00226 using std::abs; 00227 00228 //Make the step size be on the scale of the value. 00229 double h = hh * max(abs(v[i]) * 1e-3, 1e-3); 00230 00231 x[i] = v[i] + h; 00232 x[j] = v[j] + h; 00233 double a = f(x); 00234 00235 x[i] = v[i] - h; 00236 x[j] = v[j] + h; 00237 double b = f(x); 00238 00239 x[i] = v[i] + h; 00240 x[j] = v[j] - h; 00241 double c = f(x); 00242 00243 00244 x[i] = v[i] - h; 00245 x[j] = v[j] - h; 00246 double d = f(x); 00247 00248 return (a-b-c+d) / (4*h*h); 00249 } 00250 }; 00251 00252 } 00253 00254 00255 /** Extrapolate a derivative to zero using Ridder's Algorithm. 00256 00257 Ridder's algorithm works by evaluating derivatives with smaller and smaller step 00258 sizes, fitting a polynomial and extrapolating its value to zero. 00259 00260 This algorithm is generally more accurate and much more reliable, but much slower than 00261 using simple finite differences. It is robust to awkward functions and does not 00262 require careful tuning of the step size, furthermore it provides an estimate of the 00263 errors. This implementation has been tuned for accuracy instead of speed. For an 00264 estimate of the errors, see also numerical_gradient_with_errors(). In general it is 00265 useful to know the errors since some functions are remarkably hard to differentiate 00266 numerically. 00267 00268 Neville's Algorithm can be used to find a point on a fitted polynomial at 00269 \f$h\f$. Taking 00270 some points \f$h_i\f$ and \f$ y_i = f(h_i)\f$, one can define a table of 00271 points on various polyomials: 00272 \f[ 00273 \begin{array}{ccccccc} 00274 h_0 & y_0 & P_0 & & & \\ 00275 & & & P_{01} & & \\ 00276 h_1 & y_1 & P_1 & & P_{012} & \\ 00277 & & & P_{12} & & P_{0123} \\ 00278 h_2 & y_2 & P_2 & & P_{123} & \\ 00279 & & & P_{23} & & \\ 00280 h_3 & y_3 & P_3 & & & \\ 00281 \end{array} 00282 \f] 00283 where \f$P_{123}\f$ is the value of a polynomial fitted to datapoints 1, 2 and 3 00284 evaluated at \f$ h\f$. The initial values are simple to evaluate: \f$P_i = y_i = 00285 f(h_i)\f$. The remaining values are determined by the recurrance: 00286 \f{equation} 00287 P_{k\cdots n} = \frac{(h - h_n)P_{k \cdots n-1} + (h_k - h)P_{k+1 \cdots n}}{h_k - h_n} 00288 \f} 00289 00290 For Ridder's algorithm, we define the extrapolation point \f$ h=0 \f$ and the 00291 sequence of points to evaluate as \f$ h_i = c^{-i} \f$ where \f$ c \f$ is some 00292 constant a little greater than 1. 00293 Substituting in to the above equation gives: 00294 \f[ 00295 P_{k \cdots n} = \frac{c^{-k} P_{k+1 \cdots n} - P_{k \cdots n-1}}{c^{n - k} - 1} 00296 \f] 00297 00298 To measure errors, when computing (for example) \f$P_{234}\f$, we compare the 00299 value to the lower order with the same step size \f$P_{34}\f$, and the lower 00300 order with a larger step size \f$P_{23}\f$. The error estimate is the largest of 00301 these. The extrapolation with the smallest error is retained. 00302 00303 Not only that, but since every value of P is used, every single subset of points 00304 is used for polynomial fitting. So, bad points for large and small \f$h\f$ do 00305 not spoil the answer. 00306 00307 It is generally assumed that as \f$h\f$ decreases, the errors decrease, then 00308 increase again. Therefore, if the current step size did not yield any 00309 improvements on the best point so far, then we terminate when the highest order 00310 point is \f$ t \f$ times worse then the previous order point. 00311 00312 The parameters are: 00313 - \f$ c = 1.1 \f$ 00314 - \f$ t = 2 \f$ 00315 - Max iterations = 400 00316 00317 \section rRef References 00318 00319 - Ridders, C. J. F, 1982, Advances in Engineering Software, 4(2) 75--76 00320 - Press, Vetterling, Teukolsky, Flannery, Numerical, 1997, Numerical Recipies in C (2nd ed.), Chapter 5.7 00321 00322 @param f Functor to differentiate 00323 @param x Point about which to differentiate. 00324 @ingroup gFunctions 00325 */ 00326 template<class F, int S, class P, class B> Vector<S, P> numerical_gradient(const F& f, const Vector<S, P, B>& x) 00327 { 00328 using namespace Internal; 00329 Vector<S> grad(x.size()); 00330 00331 CentralDifferenceGradient<F, P, S, B> d(x, f); 00332 00333 for(int i=0; i < x.size(); i++) 00334 { 00335 d.i = i; 00336 grad[i] = extrapolate_to_zero<CentralDifferenceGradient<F, P, S, B>, P>(d).first; 00337 } 00338 00339 return grad; 00340 } 00341 00342 ///Compute numerical gradients with errors. 00343 ///See numerical_gradient(). 00344 ///Gradients are returned in the first row of the returned matrix. 00345 ///Errors are returned in the second row. The errors have not been scaled, so they are 00346 ///in the same range as the gradients. 00347 ///@param f Functor to differentiate 00348 ///@param x Point about which to differentiate. 00349 ///@ingroup gFunctions 00350 template<class F, int S, class P, class B> Matrix<S,2,P> numerical_gradient_with_errors(const F& f, const Vector<S, P, B>& x) 00351 { 00352 using namespace Internal; 00353 Matrix<S,2> g(x.size(), 2); 00354 00355 CentralDifferenceGradient<F, P, S, B> d(x, f); 00356 00357 for(int i=0; i < x.size(); i++) 00358 { 00359 d.i = i; 00360 pair<double, double> r= extrapolate_to_zero<CentralDifferenceGradient<F, P, S, B>, P>(d); 00361 g[i][0] = r.first; 00362 g[i][1] = r.second; 00363 } 00364 00365 return g; 00366 } 00367 00368 00369 ///Compute the numerical Hessian using central differences and Ridder's method: 00370 ///\f[ 00371 /// \frac{\partial^2 f}{\partial x^2} \approx \frac{f(x-h) - 2f(x) + f(x+h)}{h^2} 00372 ///\f] 00373 ///\f[ 00374 /// \frac{\partial^2 f}{\partial x\partial y} \approx \frac{f(x+h, y+h) - f(x-h,y+h) - f(x+h, y-h) + f(x-h, y-h)}{4h^2} 00375 ///\f] 00376 ///See numerical_gradient(). 00377 ///@param f Functor to double-differentiate 00378 ///@param x Point about which to double-differentiate. 00379 ///@ingroup gFunctions 00380 template<class F, int S, class P, class B> pair<Matrix<S, S, P>, Matrix<S, S, P> > numerical_hessian_with_errors(const F& f, const Vector<S, P, B>& x) 00381 { 00382 using namespace Internal; 00383 Matrix<S> hess(x.size(), x.size()); 00384 Matrix<S> errors(x.size(), x.size()); 00385 00386 CentralDifferenceSecond<F, P, S, B> curv(x, f); 00387 CentralCrossDifferenceSecond<F, P, S, B> cross(x, f); 00388 00389 //Perform the cross differencing. 00390 00391 for(int r=0; r < x.size(); r++) 00392 for(int c=r+1; c < x.size(); c++) 00393 { 00394 cross.i = r; 00395 cross.j = c; 00396 pair<double, double> e = extrapolate_to_zero<CentralCrossDifferenceSecond<F, P, S, B>, P >(cross); 00397 hess[r][c] = hess[c][r] = e.first; 00398 errors[r][c] = errors[c][r] = e.second; 00399 } 00400 00401 for(int i=0; i < x.size(); i++) 00402 { 00403 curv.i = i; 00404 pair<double, double> e = extrapolate_to_zero<CentralDifferenceSecond<F, P, S, B>, P>(curv); 00405 hess[i][i] = e.first; 00406 errors[i][i] = e.second; 00407 } 00408 00409 return make_pair(hess, errors); 00410 } 00411 00412 ///Compute the numerical Hessian and errors. The Hessian is returned as the first 00413 ///element, and the errors as the second. 00414 ///See numerical_hessian(). 00415 ///@param f Functor to double-differentiate 00416 ///@param x Point about which to double-differentiate. 00417 ///@ingroup gFunctions 00418 template<class F, int S, class P, class B> Matrix<S, S, P> numerical_hessian(const F& f, const Vector<S, P, B>& x) 00419 { 00420 using namespace Internal; 00421 Matrix<S> hess(x.size(), x.size()); 00422 00423 CentralDifferenceSecond<F, P, S, B> curv(x, f); 00424 CentralCrossDifferenceSecond<F, P, S, B> cross(x, f); 00425 00426 //Perform the cross differencing. 00427 for(int r=0; r < x.size(); r++) 00428 for(int c=r+1; c < x.size(); c++) 00429 { 00430 cross.i = r; 00431 cross.j = c; 00432 pair<double, double> e = extrapolate_to_zero<CentralCrossDifferenceSecond<F, P, S, B>, P >(cross); 00433 hess[r][c] = hess[c][r] = e.first; 00434 } 00435 00436 for(int i=0; i < x.size(); i++) 00437 { 00438 curv.i = i; 00439 pair<double, double> e = extrapolate_to_zero<CentralDifferenceSecond<F, P, S, B>, P>(curv); 00440 hess[i][i] = e.first; 00441 } 00442 00443 return hess; 00444 } 00445 } 00446 00447 #endif 00448