Dear all, now Lanczos coefficients can be evaluated *much* faster. $ time ~/tmp/lanczos -n200 -D500 > ~/tmp/coeffs-n200-D500.txt real 0m13.247s user 0m13.197s sys 0m0.032s $ time ~/tmp/lanczos.orig -n200 -D500 > ~/tmp/coeffs-n200-D500.txt.orig real 6m4.403s user 6m2.211s sys 0m1.808s New code requires less memory: for order = 200 and digits = 500 the original program consumes ~ 980Mb (according to top(1)), while improved version needs only ~ 11Mb. The actual results are slightly different, I don't know the reason yet. --- doc/examples/lanczos.cpp | 286 +++++++++++++++++++---------------------------- 1 file changed, 116 insertions(+), 170 deletions(-) diff --git a/doc/examples/lanczos.cpp b/doc/examples/lanczos.cpp index 4dc03f1..d1dce8c 100644 --- a/doc/examples/lanczos.cpp +++ b/doc/examples/lanczos.cpp @@ -48,170 +48,117 @@ * MA 02110-1301 USA */ -#include <ginac/ginac.h> +#include <vector> +#include <cstddef> // for size_t #include <iostream> +#include <cln/io.h> +#include <cln/number.h> +#include <cln/integer.h> +#include <cln/rational.h> +#include <cln/real.h> +#include <cln/real_io.h> +#include <cln/complex.h> using namespace std; -using namespace GiNaC; - -/* - * Double factorial function. - */ -ex doublefact(int i) -{ if (i==0 || i==-1) - return 1; - if (i>0) - return i*doublefact(i-2); - cout << "function doublefact called with wrong argument" << endl; - exit(1); -} +using namespace cln; /* * Chebyshev polynomial coefficient matrix as far as is required for * the Lanczos approximation. */ -void initialize_C_vec(vector<exvector> &C, int size) -{ C.reserve(size); - for (int i=0; i<size; ++i) - C.push_back(exvector(size, 0)); - C[1][1] = 1; - C[2][2] = 1; - for (int i=3; i<size; i+=2) +void calc_chebyshev(vector<vector<cl_I> >& C, const size_t size) +{ + C.reserve(size); + for (size_t i=0; i<size; ++i) + C.push_back(vector<cl_I>(size, 0)); + C[1][1] = cl_I(1); + C[2][2] = cl_I(1); + for (size_t i=3; i<size; i+=2) C[i][1] = -C[i-2][1]; - for (int i=3; i<size; ++i) + for (size_t i=3; i<size; ++i) C[i][i] = 2*C[i-1][i-1]; - for (int j=2; j<size; ++j) - for (int i=j+2; i<size; i+=2) + for (size_t j=2; j<size; ++j) + for (size_t i=j+2; i<size; i+=2) C[i][j] = 2*C[i-1][j-1] - C[i-2][j]; } /* * The coefficients p_n(g) that occur in the Lanczos approximation. */ -ex p(int k, ex g, const vector<exvector> &C) -{ ex result = 0; - for (int a=0; a<=k; ++a) - result += 2*C[2*k+1][2*a+1]/sqrt(Pi)*doublefact(2*a-1) - *power(2*a+2*g+1, -a-ex(1)/2)*exp(a+g+ex(1)/2); +const cl_F p(const size_t k, const cl_F& g, const vector<vector<cln::cl_I> >& C) +{ + const float_format_t prec = float_format(g); + const cl_F sqrtPi = sqrt(pi(prec)); + cl_F result = cl_float(0, prec); + + result = result + + (2*C[2*k+1][1])*recip(sqrtPi)* + recip(sqrt(2*g+1))*exp(g+cl_RA(1)/2); + + for (size_t a=1; a <= k; ++a) + result = result + + (2*C[2*k+1][2*a+1]*doublefactorial(2*a-1))*recip(sqrtPi)* + recip(expt(2*cl_I(a)+2*g+1, a))*recip(sqrt(2*cl_I(a)+2*g+1))* + exp(cl_I(a)+g+cl_RA(1)/2); return result; } /* - * Check wheter x is of the form 1/(z+n) where z is given by the second - * argument and n should be a positive integer that is returned as the - * third argument. If x is not of this form, false is returned. + * Calculate coefficients that occurs in the expression for the + * Lanczos approximation of order n. */ -bool is_z_pole(const ex &x, const ex &z, ex &n) -{ if (!is_a<power>(x)) - return false; - if (x.op(1) != -1) - return false; - ex denom = x.op(0); - if (!is_a<add>(denom)) - return false; - if (denom.nops() != 2) - return false; - if (denom.op(0) != z) - return false; - if (!denom.op(1).info(info_flags::posint)) - return false; - n = denom.op(1); - return true; -} +void calc_lanczos_coeffs(vector<cl_F>& lanc, const cln::cl_F& g) +{ + const size_t n = lanc.size(); + vector<vector<cln::cl_I> > C; + calc_chebyshev(C, 2*n+2); + + // \Pi_{i=1}^n (z-i+1)/(z+i) = \Pi_{i=1}^n (1 - (2i-1)/(z+i)) + // Such a product can be rewritten as multivariate polynomial \in + // Q[1/(z+1),...1/(z+n)] of degree 1. To store coefficients of this + // polynomial we use vector<cl_I>, so the set of such polynomials is + // stored as + vector<vector<cln::cl_I> > fractions(n); + + // xi = 1/(z+i) + fractions[0] = vector<cl_I>(1); + fractions[0][0] = cl_I(1); + fractions[1] = fractions[0]; + fractions[1].push_back(cl_I(-1)); // 1 - 1/(z+1) -/* - * Simplify the expression x by applying the rules - * - * 1/(z+n)/(z+m) = 1/(n-m)/(z+m) - 1/(n-m)/(z+n); - * z^m/(z+n) = z^(m-1) - n*z^(m-1)/(z+n) - * - * as often as possible, where z is given as an argument to the function - * and n and m are arbitrary positive numbers. - */ -ex poles_simplify(const ex &x, const ex &z) -{ if (is_a<mul>(x)) - { for (int i=0; i<x.nops(); ++i) - { ex arg1; - if (!is_z_pole(x.op(i), z, arg1)) - continue; - for (int j=i+1; j<x.nops(); ++j) - { ex arg2; - if (!is_z_pole(x.op(j), z, arg2)) - continue; - ex result = x/x.op(i)/(arg1-arg2)-x/x.op(j)/(arg1-arg2); - result = poles_simplify(result, z); - return result; - } - } - ex result = expand(x); - if (is_a<add>(result)) - return poles_simplify(result, z); - for (int i=0; i<x.nops(); ++i) - { ex arg1; - if (!is_z_pole(x.op(i), z, arg1)) - continue; - for (int j=0; j<x.nops(); ++j) - { ex expon = 0; - if (is_a<power>(x.op(j)) && x.op(j).op(0)==z - && x.op(j).op(1).info(info_flags::posint)) - expon = x.op(j).op(1); - if (x.op(j) == z) - expon = 1; - if (expon == 0) - continue; - ex result = x/x.op(i)/z - arg1*x/z; - result = poles_simplify(result, z); - return result; - } + for (size_t i=2; i<n; ++i) + { + // next = previous*(1 - (2*i-1)*xi) = + // previous - (2*i-1)*xi - (2i-1)*(previous-1)*xi + // Such representation is convenient since previous contains only + // only x1...x[i-1] + + fractions[i] = fractions[i-1]; + fractions[i].push_back(-cl_I(2*i-1)); // - (2*i-1)*xi + // Now add -(2i+1)*(previous-1)*xi using formula + // 1/(z+j)*1/(z+i) = 1/(i-j)*(1/(z+j) - 1/(z+i)), that is + // xj*xi = 1/(i-j)(xj - xi) + for (size_t j=1; j < i; j++) { + cl_I curr = - exquo(cl_I(2*i-1)*fractions[i-1][j], cl_I(i-j)); + fractions[i][j] = fractions[i][j] + curr; + fractions[i][i] = fractions[i][i] - curr; + // N.B. i-th polynomial has (i+1) non-zero coefficients } - return x; - } - if (is_a<add>(x)) - { pointer_to_map_function_1arg<const ex&> mf(poles_simplify, z); - return x.map(mf); } - return x; -} -/* - * Calculate the expression A_g(z) that occurs in the expression for the - * Lanczos approximation of order n. This is returned as an expression of - * the form - * - * A_g(z) = coeff[0] + coeff[1]/(z+1) + coeff[2]/(z+2) + ... - * + coeff[N-1]/(z+N-1). - */ -ex A(const ex &g, const ex &z, int n) -{ vector<exvector> C; - initialize_C_vec(C, 2*n+2); - ex result = evalf(p(0, g, C))/2; - ex fraction = 1; - for (int i=1; i<n; ++i) - { fraction *= (z-i+1)/(z+i); - fraction = poles_simplify(fraction, z); - result += evalf(p(i, g, C))*fraction; - } - result = poles_simplify(result, z); - return result; -} + vector<cl_F> p_cache(n); + for (size_t i=0; i < n; i++) + p_cache[i] = p(i, g, C); -/* - * The exvector coeffs should contain order elements and these are set to - * the coefficients that belong to the Lanczos approximation of the gamma - * function at the given order. - */ -void calc_lanczos_coeffs(exvector &coeffs, double g_val, int order) -{ symbol g("g"), z("z"); - ex result = A(g, z, order); - result = result.subs(g==g_val); - for (int i=0; i<result.nops(); ++i) - { if (is_a<numeric>(result.op(i))) - coeffs[0] = result.op(i); - else - { ex n; - is_z_pole(result.op(i).op(0), z, n); - coeffs[ex_to<numeric>(n).to_int()] = result.op(i).op(1); - } + lanc[0] = p_cache[0]/2; + + float_format_t prec = float_format(g); + // A = p(i, g, C)*fraction[i] = p(i, g, C)*F[i][j]*xj, + for (size_t j=1; j < n; ++j) { + lanc[j] = cl_float(0, prec); + lanc[0] = lanc[0] + p_cache[j]; + for (size_t i=j; i < n; i++) + lanc[j] = lanc[j] + p_cache[i]*fractions[i][j]; } } @@ -219,14 +166,19 @@ void calc_lanczos_coeffs(exvector &coeffs, double g_val, int order) * Calculate Gamma(z) using the Lanczos approximation with parameter g and * coefficients stored in the exvector coeffs. */ -ex calc_gamma(const ex &z, const ex &g, exvector &coeffs) -{ if (real_part(evalf(z)) < 0.5) - return evalf(Pi/sin(Pi*z)/calc_gamma(1-z, g, coeffs)); - ex A = coeffs[0]; - for (int i=1; i<coeffs.size(); ++i) - A += evalf(coeffs[i]/(z-1+i)); - ex result = sqrt(2*Pi)*power(z+g-ex(1)/2, z-ex(1)/2)*exp(-(z+g-ex(1)/2))*A; - return evalf(result); +const cl_N calc_gamma(const cl_N& z, const cl_F& g, const vector<cl_F>& lanc) +{ + const cl_F thePi = pi(float_format(g)); + // XXX: need to check if z is floating-point and precision of z + // matches one of g + if (realpart(z) < 0.5) + return (thePi/sin(thePi*z))/calc_gamma(1-z, g, lanc); + cl_N A = lanc[0]; + for (size_t i=1; i < lanc.size(); ++i) + A = A + lanc[i]/(z-1+i); + cl_N result = sqrt(2*thePi)*expt(z+g-cl_RA(1)/2, z-cl_RA(1)/2)* + exp(-(z+g-cl_RA(1)/2))*A; + return result; } void usage(char *progname) @@ -234,13 +186,13 @@ void usage(char *progname) exit(0); } -void read_options(int argc, char**argv, int &order) +void read_options(int argc, char**argv, int &order, int& digits) { int c; while((c=getopt(argc,argv,"n:D:"))!=-1) { if(c=='n') order = atoi(optarg); else if (c=='D') - Digits = atoi(optarg); + digits = atoi(optarg); else usage(argv[0]); } @@ -248,45 +200,39 @@ void read_options(int argc, char**argv, int &order) usage(argv[0]); } -/* - * Do something stupid to the number x in order to round it to the current - * numer of digits. Does somebody know a better way to do this? In that case, - * please fix me. - */ -ex round(const ex &x) -{ return ex_to<numeric>(x).add(numeric("0.0")); -} - int main(int argc, char *argv[]) { /* * Handle command line options. */ int order = 10; - read_options(argc, argv, order); + int digits_ini = 17; + read_options(argc, argv, order, digits_ini); + float_format_t prec = float_format(digits_ini); + const cl_F thePi = pi(prec); /* * Calculate coefficients. */ - const int g_val = order; - exvector coeffs(order); - calc_lanczos_coeffs(coeffs, g_val, order); + const cl_F g_val = cl_float(order, prec); + vector<cl_F> coeffs(order); + calc_lanczos_coeffs(coeffs, g_val); /* * Determine the accuracy by comparing Gamma(1/2) to sqrt(Pi). */ - ex gamma_half = calc_gamma(ex(1)/2, g_val, coeffs); - ex digits = -log(abs((gamma_half - sqrt(Pi)))/sqrt(Pi))/log(10.0); - digits = evalf(digits); - int i_digits = int(ex_to<numeric>(digits).to_double()); - if (digits < 1) + cl_N gamma_half = calc_gamma(cl_float(cl_RA(1)/2, prec), g_val, coeffs); + cl_F digits = (ln(thePi)/2 - ln(abs(gamma_half - sqrt(thePi))))/ln(cl_float(10, prec)); + int i_digits = cl_I_to_int(floor1(digits)); + if (digits < cl_I(1)) cout << "Forget it, this is waaaaaaay too inaccurate." << endl; else cout << "Reliable digits: " << i_digits << endl; /* * Print the coefficients. */ - Digits = i_digits + 10; // Don't print too many spurious digits. - for (int i=0; i<coeffs.size(); ++i) - cout << "coeffs_" << order << "[" << i << "] = numeric(\"" - << round(coeffs[i]) << "\");"<< endl; + for (size_t i=0; i<coeffs.size(); ++i) { + cout << "coeffs_" << order << "[" << i << "] = \""; + cout << coeffs[i]; + cout << "\";" << endl; + } return 0; } -- 1.4.4.4 Best regards, Alexei -- All science is either physics or stamp collecting.