/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2013 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <ctype.h>
#include <float.h>
#include "Error.h"
#include "pdbMalloc.h"
#include "pdbStats.h"
#include "pdbUtils.h"
#include "pdbIO.h"
#include "Cds.h"
#include "MultiPose.h"
#include "PDBCds.h"
#include "distfit.h"
#include "DLTmath.h"
#include "RandCds.h"
#include "ProcGSLSVD.h"
#include "DLTmath.h"
#include "libdistfit/vonmises_dist.h"
#include <gsl/gsl_math.h>
#include <gsl/gsl_sf_hyperg.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>


static double
CalcLogLGibbs(CdsArray *cdsA);

double
scale_met3(const double n, const double gamma, const double phi, const double x, const gsl_rng *r2, double loc, double width);


double
ScaleMax(const double n, const double gamma, const double phi)
{
    return((gamma + sqrt(gamma*gamma + 4.0*phi*(n-1.0)))/(2.0*phi));
}


/* Parabolic cylinder function D_n(z)
   See Abramowitz and Stegun p 510, 13.6.36
   also see Ch 19, 19.3, 19.5.1, etc.
   The parabolic cylinder function a type of conuent hypergeometric function, 
   dened in Gradshteyn and Ryzhik p 1028, section 9.24-9.25.

   http://mathworld.wolfram.com/ParabolicCylinderFunction.html

   gsl_sf_hyperg_U calculates the hypergeometric function of the 2nd kind:

   http://mathworld.wolfram.com/ConfluentHypergeometricFunctionoftheSecondKind.html
*/
/* NB:  THIS IS BROKEN.  gsl_sf_hyperg_U doesn't work for some large arguments -- I'm unsure exactly which,
   but it sucks and makes the fxn useless for me. 
   It does, however, successfully reproduce ALL the tables in A&S (pp 702-710). */
double
CalcDnz(const double n, const double z)
{
    return(pow(2.0, 0.5 * n) * exp(-0.25 * z*z ) * gsl_sf_hyperg_U(-0.5 * n, 0.5, 0.5 * z*z));
}


/* NB:  This is broken for large arguments, because CalcDnz is broken */
double
CalcUax(const double a, const double x)
{
    return(CalcDnz(-a-0.5, x));
}


double
CalcHalfNormChiLik(const double x, const double n, const double gamma, const double phi)
{
    if (x < DBL_MIN)
    {
        return(0.0);
    }
    else
    {
         double logp = (n-1.0) * log(x) - (0.5 * phi * x * x) + (gamma * x);
         return(exp(logp));
//        return(pow(x, n-1.0) * exp((-0.5 * phi * x * x) + (gamma * x)));
    }
}


/* Calculates the normalizing constant for the scale factor PDF:

   P(x) \propto  x^(n-1) e^-(phi/2 x^2 - gamma x)

   The integral for this can be found in Gradshteyn and Ryzhik,
   p. 365, formula 3.462(1).
*/
double
CalcNormConst(const double n, const double gamma, const double phi)
{
    double      tmpx;

     tmpx = (pow(phi, -0.5 * n) * exp(gamma*gamma / (4.0 * phi))) * 
            (tgamma(n) * CalcDnz(-n, -gamma / sqrt(phi)));

//        tmpx = (pow(phi, -0.5 * n) * exp(gamma*gamma / (4.0 * phi))) * 
//           (tgamma(n) * CalcUab_large_a(n-0.5, -gamma / sqrt(phi)));

    return(1.0/tmpx);
}


double
CalcNormConstMm(const double n, const double gamma, const double phi)
{
    double      tmpx;

    tmpx = pow(2.0, 0.5*(n-3.0)) * pow(phi,-0.5*(n+1.0)) 
         * (
            sqrt(2.0*phi) * tgamma(0.5*n) * gsl_sf_hyperg_1F1(0.5*n, 0.5, 0.5*gamma*gamma/phi) 
            + 2.0 * gamma * tgamma(0.5*(n+1.0)) * gsl_sf_hyperg_1F1 (0.5*(n+1.0), 1.5, 0.5*gamma*gamma/phi)
            );

    return(1.0/tmpx);
}


double
CalcHalfNormChi(const double x, const double n, const double gamma, const double phi)
{
    return(CalcHalfNormChiLik(x, n, gamma, phi) * CalcNormConstMm(n, gamma, phi));
}


double
ExpectScale(const double n, const double gamma, const double phi)
{
    return((n+1.0) * CalcUax(n+0.5, -gamma/sqrt(phi))/(sqrt(phi)*CalcUax(n-0.5,-gamma/sqrt(phi))));
    //return((n+1.0) * CalcDnz(-n-1.0, -gamma/sqrt(phi))/(sqrt(phi)*CalcDnz(-n,-gamma/sqrt(phi))));
}


static void
AveCdsGibbs(CdsArray *cdsA)
{
    int             i, j;
    double         *avex = cdsA->avecds->x,
                   *avey = cdsA->avecds->y,
                   *avez = cdsA->avecds->z;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    const Cds  **cds = (const Cds **) cdsA->cds;
    Cds         *cdsj = NULL;
    double          invcnum = 1.0 / (double) cnum;

    memset(avex, 0, vlen * sizeof(double));
    memset(avey, 0, vlen * sizeof(double));
    memset(avez, 0, vlen * sizeof(double));

    for (j = 0; j < cnum; ++j)
    {
        cdsj = (Cds *) cds[j];

        for (i = 0; i < vlen; ++i)
        {
            avex[i] += cdsj->x[i];
            avey[i] += cdsj->y[i];
            avez[i] += cdsj->z[i];
        }
    }

    for (i = 0; i < vlen; ++i)
    {
        avex[i] *= invcnum;
        avey[i] *= invcnum;
        avez[i] *= invcnum;
    }
}


static void
RotMatToQuaternion(const double **rot, double *quat)
{
    double              trace, s, w, x, y, z;

    /* convert to quaternion */
    trace = rot[0][0] + rot[1][1] + rot[2][2] + 1.0; 

    if( trace > FLT_EPSILON )
    {
        s = 0.5 / sqrt(trace);
        w = 0.25 / s;
        x = ( rot[2][1] - rot[1][2] ) * s;
        y = ( rot[0][2] - rot[2][0] ) * s;
        z = ( rot[1][0] - rot[0][1] ) * s;
    }
    else
    {
        if (rot[0][0] > rot[1][1] && rot[0][0] > rot[2][2])
        {
            s = 2.0 * sqrt( 1.0 + rot[0][0] - rot[1][1] - rot[2][2]);
            x = 0.25 * s;
            y = (rot[0][1] + rot[1][0] ) / s;
            z = (rot[0][2] + rot[2][0] ) / s;
            w = (rot[1][2] - rot[2][1] ) / s;
        }
        else if (rot[1][1] > rot[2][2])
        {
            s = 2.0 * sqrt(1.0 + rot[1][1] - rot[0][0] - rot[2][2]);
            x = (rot[0][1] + rot[1][0] ) / s;
            y = 0.25 * s;
            z = (rot[1][2] + rot[2][1] ) / s;
            w = (rot[0][2] - rot[2][0] ) / s;
        }
        else
        {
            s = 2.0 * sqrt(1.0 + rot[2][2] - rot[0][0] - rot[1][1]);
            x = (rot[0][2] + rot[2][0] ) / s;
            y = (rot[1][2] + rot[2][1] ) / s;
            z = 0.25 * s;
            w = (rot[0][1] - rot[1][0] ) / s;
        }
    }

    quat[0] = -w;
    quat[1] = x;
    quat[2] = y;
    quat[3] = z;
}


static void
CdsInnProd2(Cds *cds, const double *wts)
{
    /* (i x k)(k x j) = (i x j) */
    /* (3 x N)(N x 3) = (3 x 3) */
    int             k;
    double        **innerprod2 = NULL;
    const double   *x = (const double *) cds->x,
                   *y = (const double *) cds->y,
                   *z = (const double *) cds->z;
    double          xk, yk, zk, wtsi;

    if (cds->innerprod2 == NULL)
        innerprod2 = cds->innerprod2 = MatAlloc(3, 3);
    else
        innerprod2 = cds->innerprod2;

    memset(&innerprod2[0][0], 0, 9 * sizeof(double));

    for (k = 0; k < cds->vlen; ++k)
    {
        wtsi = wts[k];

        xk = x[k];
        yk = y[k];
        zk = z[k];

        innerprod2[0][0] += (xk * xk) * wtsi;
        innerprod2[1][1] += (yk * yk) * wtsi;
        innerprod2[2][2] += (zk * zk) * wtsi;
        innerprod2[0][1] += (xk * yk) * wtsi;
        innerprod2[0][2] += (xk * zk) * wtsi;
        innerprod2[1][2] += (yk * zk) * wtsi;
    }

    innerprod2[1][0] = innerprod2[0][1];
    innerprod2[2][0] = innerprod2[0][2];
    innerprod2[2][1] = innerprod2[1][2];

    printf("tr(X'X) = % e\n", innerprod2[0][0] + innerprod2[1][1] + innerprod2[2][2]);

    /* Mat3Print(innerprod2); */
}


static void
CalcCdsPrincAxesGibbs(Cds *cds, double **rotmat, const double *wts)
{
    double         *evals = (double *) malloc(3 * sizeof(double));
    double          det;
    int             i, j;

    CdsInnProd2(cds, wts);
    jacobi3_cyc(cds->innerprod2, evals, rotmat, 1e-8);
//     eigensym((const double **) cds->innerprod2, evals, rotmat, 3);
//     Mat3TransposeIp(rotmat);
    EigenSort3b(rotmat, evals);
//    printf("\nevals:\n");
    //Mat3TransposeIp(rotmat);
//    VecPrint(evals, 3);
//    printf("\nCalcCdsPrincAxes A:");
//    Mat3Print(rotmat);

    det = Mat3Det((const double **) rotmat);

    if (det < 0)
    {
//        printf("\nNEGATIVE DETERMINANT\n");
        for (i = 0; i < 3; ++i)
        {
            if (rotmat[i][i] < 0)
            {
                for (j = 0; j < 3; ++j)
                    rotmat[i][j] *= -1.0;

                break;
            }
        }

//        Mat3Print(rotmat);
    }

    free(evals);
}


static double
wrap_nPI_pPI(double x)
{
    while(x < -MY_PI)
        x += 2.0*MY_PI;

    while(x > MY_PI)
        x -= 2.0*MY_PI;

    return(x);
}


static double 
mardia_gadsden_target_ratio(const double a, const double b, const double x, const double y)
{
    return(exp(a * (cos(y) - cos(x)) + b * (sin(y) - sin(x))) * cos(y) / cos(x));
}


double
mardia_gadsden_met3(const double a, const double b, const double x, const gsl_rng *r2, const double width)
{
    double          r, y, u;
//    unsigned long   seed = (unsigned long) time(NULL);

    //y = x + (2.0 * width * gsl_rng_uniform(r2) - width);
    //y = normal_dev3(x, width, r2);
    y = x + gsl_ran_gaussian(r2, width);
    y = wrap_nPI_pPI(y);
    r = mardia_gadsden_target_ratio(a, b, x, y);
    u = gsl_rng_uniform(r2);

//    printf("\nMETROPOLIS: % e % e -- % f % f", x, y, r, u);
//    fflush(NULL);

    if (u < r)
    {
//        printf("\nACCEPT: 1");
        return(y);
    }
    else
    {
//        printf("\nACCEPT: 0");
        return(x);
    }
}


static double 
scale_target_ratio(const double n, const double gamma, const double phi, const double x, const double y)
{
    return(exp(-0.5*phi*(y*y - x*x) + gamma*(y-x)) * pow(y/x, n-1.0));
}


double
scale_met3(const double n, const double gamma, const double phi, const double x, const gsl_rng *r2, double loc, double width)
{
    double          r, y, u;

    //y = x + (2.0 * width * gsl_rng_uniform(r2) - width);
    //y = loc + gsl_ran_gaussian(r2, 3.0 * width);
    y = x + gsl_ran_gaussian(r2, width);
    //y = x + normal_dev3(0.0, width, r2);
    if (y < 0.0)
       y = -y;
    r = scale_target_ratio(n, gamma, phi, x, y);
    u = gsl_rng_uniform(r2);

    //printf("\nMETROPOLIS: % e % e -- % f % f", x, y, r, u);

    if (u < r)
    {
        //printf("\nACCEPT: 1");
        return(y);
    }
    else
    {
        //printf("\nACCEPT: 0");
        return(x);
    }
}


static double 
scale_log_target_ratio(const double n, const double gamma, const double phi, const double x, const double y)
{
    double k = 2.0;
    double theta = 1.0/2.0;
    double hngamma = -0.5*phi*(y*y - x*x) + gamma*(y-x) + (n-1.0) * log(y/x);
    //hngamma = 0.0;
    double prior = (k-1.0) * log(y/x) - (y-x)/theta; // gamma prior
    
    return(hngamma + prior);
}


double
scale_log_met3(const double n, const double gamma, const double phi, double x, const gsl_rng *r2, const double loc, const double width, const int iters)
{
    double          r, y, u;
    int             i;
    double          jit;

    for (i = 0; i < iters; ++i)
    {
        //y = x + (2.0 * width * gsl_rng_uniform(r2) - width);
        //y = loc + gsl_ran_gaussian(r2, 3.0 * width);
        jit = gsl_ran_gaussian(r2, 3.0 * width);
        //y = x + gsl_ran_gaussian(r2, width);
        //jit = gsl_rng_uniform(r2) * 0.6 - 0.3;
        //jit = gsl_ran_gaussian(r2, 0.7);
        y = x + jit;

        if (y < 0.0)
           y = -y;
        r = scale_log_target_ratio(n, gamma, phi, x, y);
        u = log(gsl_rng_uniform(r2));

        //printf("\nMETROPOLIS: % e % e -- % f % f % f", x, y, r, u, jit);

        if (u < r)
        {
            //printf("\nACCEPT: 1 %e", y);
            x = y;
        }
        else
        {
            //printf("\nACCEPT: 0 %e", x);
        }
    }

    return(x);
}


/*
KVM
For simplicity of presentation take phi =1
*/
double
scale_rejection(const double r, const double gamma, const double phi, const gsl_rng *r2)
{
    double         sqrtphi = sqrt(phi);
    double         g = gamma / sqrtphi;
    double         R, y, u;
    double         b = 0.5 * (g + sqrt(g*g + 4.0 * r));

    do
    {
        u = gsl_rng_uniform(r2);
        y = gsl_ran_gamma(r2, r, b - g);
//        t = y - gamma - 1.0;
        R = exp(-0.25 * g * g) * pow(b - g, -r) / CalcUax(r - 0.5, -g);

        printf("\nREJ: %e %e %e", u, y, R);
        fflush(NULL);
    }
    while (u >= R);

    return(y/sqrtphi);
}


/* D. J. Best and N. I. Fisher (1979)
   "Efficient simulation of the von Mises distribution."
   Applied Statistics 28:152157. */
double
vonmises_dev3(const double a, const double b, const gsl_rng *r2)
{
    double          z, f, c, t, p, r;

    t = 1.0 + sqrt(1.0 + 4.0*b*b);
    p = 0.5*(t - sqrt(2.0*t))/b;
    r = 0.5*(1.0 + p*p)/p;

    do
    {
        z = cos(MY_PI * gsl_rng_uniform(r2));
        f = (1.0 + r*z)/(r + z);
        c = b*(r-f);
    }
    while(log(c/gsl_rng_uniform(r2)) + 1.0 < c);

    if (gsl_rng_uniform(r2) > 0.5)
        return(wrap_nPI_pPI(a + acos(f)));
    else
        return(wrap_nPI_pPI(a - acos(f)));
}


static double
invgamma_dev4(const double b, const double c, const gsl_rng *r2)
{
    return(1.0 / gsl_ran_gamma(r2, c, 1.0/b));
}


static void
VarCds(CdsArray *cdsA)
{
    int             i, j;
    double          sqrdist;
    double          tmpx, tmpy, tmpz;
    double          variance;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    double         *var = cdsA->var;
    const Cds  **cds = (const Cds **) cdsA->cds;
    Cds         *cdsj = NULL;
    const double   *avex = (const double *) cdsA->avecds->x,
                   *avey = (const double *) cdsA->avecds->y,
                   *avez = (const double *) cdsA->avecds->z;

    variance = 0.0;
    for (i = 0; i < vlen; ++i)
    {
        sqrdist = 0.0;
        for (j = 0; j < cnum; ++j)
        {
            cdsj = (Cds *) cds[j];
            tmpx = cdsj->x[i] - avex[i];
            tmpy = cdsj->y[i] - avey[i];
            tmpz = cdsj->z[i] - avez[i];
            sqrdist += tmpx*tmpx + tmpy*tmpy + tmpz*tmpz;
        }

        var[i] = sqrdist / (3.0 * cnum);
        variance += var[i];
    }

    variance /= (double) vlen;
    cdsA->stats->stddev = sqrt(variance);
    cdsA->stats->var = variance;
}


static void
MetScale(CdsArray *cdsA, const gsl_rng *r2)
{
    int             i;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    const int       n = 3.0 * vlen + 1.0;
    double          phi, gamma;
    Cds            *cdsi = NULL;
    Cds            *avecds = cdsA->avecds;
    double          sm, width, oldscale;
    double          var = cdsA->stats->var;
    int             skip = 7;
    double          priorg = 0.0;

    double sum = 0.0;
    for (i = 0; i < cnum; ++i)
        sum += cdsA->cds[i]->scale;

    sum /= cnum;
    printf("\nave: % f", sum);

    for (i = 0; i < cnum; ++i)
    {
        cdsi = cdsA->cds[i];
        oldscale = cdsi->scale;

        /* here we have to undo the effects of the previous scaling step, for both phi and gamma
           phi is the self inner prod, so the scale is squared */
        phi = TrCdsInnerProd(cdsi, vlen) / (var * oldscale * oldscale);
        gamma = TrCdsInnerProd2(cdsi, avecds, vlen) / (var * oldscale) - priorg;
        sm = ScaleMax(n, gamma, phi);
        width = sqrt(1.0 / (phi + (n-1.0)/(sm*sm)));

/*         printf("\n  trans[%d]", i+1); */
/*         printf("\nB trans[%d]: % f % f % f", i+1,  */
/*                cdsi->center[0], cdsi->center[1], cdsi->center[2]); */

        cdsi->scale = scale_log_met3(n, gamma, phi, oldscale, r2, sm, width, skip);
        ScaleCds(cdsi, cdsi->scale / oldscale);

        //printf("\nscale[%3d]: % f % f % f % f % f", i+1, cdsi->scale, phi, gamma, sm, width);
        //fflush(NULL);
    }
}


static void
MetScaleDiag(CdsArray *cdsA, const gsl_rng *r2)
{
    int             i;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    const int       n = 3.0 * vlen + 1.0;
    double          phi, gamma;
    Cds            *cdsi = NULL;
    Cds            *avecds = cdsA->avecds;
    double         *wts = cdsA->w;
    double          sm, width, oldscale;
    int             skip = 7;
    double          priorg = 0.0; // set to 1.0 for exponential prior mean = 1

    double sum = 0.0;
    for (i = 0; i < cnum; ++i)
        sum += cdsA->cds[i]->scale;

    sum /= cnum;
    printf("\nave: % f", sum);

    sum = 0.0;
    for (i = 0; i < cnum; ++i)
        sum += log(cdsA->cds[i]->scale);

    sum /= cnum;
    printf("\nave log: % f", sum);

    for (i = 0; i < cnum; ++i)
    {
        cdsi = cdsA->cds[i];
        oldscale = cdsi->scale;

        /* here we have to undo the effects of the previous scaling step, for both phi and gamma
           phi is the self inner prod, so the scale is squared */
        phi = TrCdsInnerProdWt(cdsi, vlen, wts) / (oldscale * oldscale);
        gamma = TrCdsInnerProdWt2(cdsi, avecds, vlen, wts) / oldscale - priorg;
        sm = ScaleMax(n, gamma, phi);
        width = sqrt(1.0 / (phi + (n-1.0)/(sm*sm)));

/*         printf("\n  trans[%d]", i+1); */
/*         printf("\nB trans[%d]: % f % f % f", i+1,  */
/*                cdsi->center[0], cdsi->center[1], cdsi->center[2]); */

        cdsi->scale = scale_log_met3(n, gamma, phi, oldscale, r2, sm, width, skip);
        //skip = 1;
        //cdsi->scale = scale_rejection(n, gamma, phi, r2);
        ScaleCds(cdsi, cdsi->scale / oldscale);

        //printf("\n(diag) scale[%3d]: % f % f % f % f % f", i+1, cdsi->scale, phi, gamma, sm, width);
        //fflush(NULL);
    }
}


static void
GibbsVar(CdsArray *cdsA, const gsl_rng *r2)
{
    double          var;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;

    VarCds(cdsA);
    var = cdsA->stats->var;
    //printf("\nB var: % e", var);

    cdsA->stats->var = invgamma_dev4(1.5 * cnum * vlen * var, 1.5 * cnum * vlen, r2);

    //printf("\nA var: % e  % e", cdsA->stats->var, cdsA->stats->stddev);
    //fflush(NULL);
}


static void
GibbsPhi(CdsArray *cdsA, const gsl_rng *r2)
{
    const int       vlen = cdsA->vlen;
    double          invtr, a, b;
    int             i;

    invtr = 0.0;
    for (i = 0; i < vlen; ++i)
        invtr += 1.0 / cdsA->var[i];

    a = 0.5 * (vlen + 2.0);
    b = 2.0 / (invtr + 2.0 / cdsA->stats->alpha);

    cdsA->stats->phi = gsl_ran_gamma(r2, a, b);

//    printf("\nA phi: %e %e %e %e", cdsA->stats->phi, b, vlen / invtr, sqrt(vlen / invtr));
//    fflush(NULL);
}


static void
GibbsVarDiag(CdsArray *cdsA, const gsl_rng *r2)
{
    double          phi = cdsA->stats->phi;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    int             i;

    VarCds(cdsA);

    for (i = 0; i < vlen; ++i)
    {
        //printf("\nvar[%d]: %e %e %e", i, cdsA->var[i], 3.0 * cnum * cdsA->var[i], (3.0 * cnum * cdsA->var[i] + phi));
        cdsA->var[i] = invgamma_dev4(0.5 * (3.0 * cnum * cdsA->var[i] + phi), 1.5 * cnum + 0.5, r2);
        //cdsA->var[i] = invgamma_dev4(0.5 * (3.0 * cnum * cdsA->var[i]), 1.5 * cnum + 0.5, r2);
//        printf(" %e", cdsA->var[i]);
    }

    for (i = 0; i < vlen; ++i)
        cdsA->w[i] = 1.0 / cdsA->var[i];
}


static void
GibbsMean(CdsArray *cdsA, const gsl_rng *r2)
{
    int             i;
    double         *avex = cdsA->avecds->x,
                   *avey = cdsA->avecds->y,
                   *avez = cdsA->avecds->z;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    double          mvar = sqrt(cdsA->stats->var / cnum);

/*     printf("\nmvar: % e", mvar); */

    AveCdsGibbs(cdsA);

    for (i = 0; i < vlen; ++i)
    {
        avex[i] += gsl_ran_gaussian(r2, mvar);
        avey[i] += gsl_ran_gaussian(r2, mvar);
        avez[i] += gsl_ran_gaussian(r2, mvar);
    }


    CenMass(cdsA->avecds);

/* 
    ApplyCenterIp(cdsA->avecds);

    for (i = 0; i < cnum; ++i)
        NegTransCdsIp(cdsA->cds[i], cdsA->avecds->center);
 */

}


static void
GibbsMeanDiag(CdsArray *cdsA, const gsl_rng *r2)
{
    int             i;
    Cds            *avecds = cdsA->avecds;
    double         *avex = avecds->x,
                   *avey = avecds->y,
                   *avez = avecds->z;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    double          mvar;

    AveCdsGibbs(cdsA);

    for (i = 0; i < vlen; ++i)
    {
        mvar = sqrt(cdsA->var[i] / (cnum));

        avex[i] += gsl_ran_gaussian(r2, mvar);
        avey[i] += gsl_ran_gaussian(r2, mvar);
        avez[i] += gsl_ran_gaussian(r2, mvar);
        //avez[i] = 0.0;
    }

    CenMassWtIp(avecds, cdsA->w);

    printf("\ntrans: % f % f % f",
           cdsA->avecds->center[0], cdsA->avecds->center[1], cdsA->avecds->center[2]);
    fflush(NULL);

/* 
    ApplyCenterIp(avecds);

    for (i = 0; i < cnum; ++i)
        NegTransCdsIp(cdsA->cds[i], cdsA->avecds->center);
 */


//    printf("\nmean trans[%d]: % f % f % f", i+1, 
//           avecds->center[0], avecds->center[1], avecds->center[2]);
//    fflush(NULL);
}


static void
GibbsMeanWrite(CdsArray *cdsA, const gsl_rng *r2, const int iter)
{
    int             i;
//    Cds         *avecds = cdsA->avecds;
//    const int       vlen = cdsA->vlen;
    const int       cnum = cdsA->cnum;
    char            filename[256], avename[256];

/* 
    for (i = 0; i < vlen; ++i)
        cdsA->w[i] = 1.0 / cdsA->stats->var;
 */

//    AveCdsGibbs(cdsA);
//    GibbsMean(cdsA, r2);
// 
//     CenMass(avecds);
//     ApplyCenterIp(avecds);

/* 
    CalcCdsPrincAxesGibbs(avecds, avecds->matrix, cdsA->w);
    Mat3TransposeIp(avecds->matrix);
    RotateCdsIp(avecds, (const double **) avecds->matrix);

    for (i = 0; i < cnum; ++i)
        RotateCdsIp(cdsA->cds[i], (const double **) avecds->matrix);
 */


    double sum = 0.0;

    for (i = 0; i < cnum; ++i)
        sum += log(cdsA->cds[i]->scale);

    sum = exp(sum/cnum);

    for (i = 0; i < cnum; ++i)
        ScaleCds(cdsA->cds[i], 1.0/sum);

    sprintf(filename, "%s%05d.pdb", "gibbs_", iter);
/*     printf("\nWriting %s", filename); */
/*     fflush(NULL); */
    WriteTheseusCdsModelFile(cdsA, filename);
    sprintf(avename, "%s%05d.pdb", "gibbs_ave_", iter);
    WriteAveCds(cdsA, avename);

    for (i = 0; i < cnum; ++i)
        ScaleCds(cdsA->cds[i], sum);

    GibbsMean(cdsA, r2);
}


static void
GibbsMeanDiagWrite(CdsArray *cdsA, const gsl_rng *r2, const int iter)
{
    //int             i;
    //Cds         *avecds = cdsA->avecds;
    //const int       vlen = cdsA->vlen;
    //const int       cnum = cdsA->cnum;
    char            filename[256], avename[256];

    //AveCdsGibbs(cdsA);
    GibbsMeanDiag(cdsA, r2);

//    for (i = 0; i < vlen; ++i)
//        cdsA->w[i] = 1.0;
//         cdsA->w[i] = 1.0 / cdsA->var[i];
// 
//     CenMassWtIp(avecds, cdsA->w);
//     ApplyCenterIp(avecds);

/* 
    CalcCdsPrincAxesGibbs(avecds, avecds->matrix, cdsA->w);
    Mat3TransposeIp(avecds->matrix);
    RotateCdsIp(avecds, (const double **) avecds->matrix);

    for (i = 0; i < cnum; ++i)
        RotateCdsIp(cdsA->cds[i], (const double **) avecds->matrix);
 */


//     for (i = 0; i < vlen; ++i)
//         cdsA->w[i] = 1.0 / cdsA->var[i];

    sprintf(filename, "%s%05d.pdb", "gibbs_", iter);
/*     printf("\nWriting %s", filename); */
/*     fflush(NULL); */
    WriteTheseusCdsModelFile(cdsA, filename);
    sprintf(avename, "%s%05d.pdb", "gibbs_ave_", iter);
    WriteAveCds(cdsA, avename);

    GibbsMeanDiag(cdsA, r2);
}


void
ModCenMass(Cds *cdsi, double *avecen)
{
    cdsi->center[0] -= avecen[0];
    cdsi->center[1] -= avecen[1];
    cdsi->center[2] -= avecen[2];
}


static void
GibbsTrans(CdsArray *cdsA, const gsl_rng *r2)
{
    int             i;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    double          var = cdsA->stats->var;
    double          tvar, tvarbeta;
    //double          newcen[3];
    Cds            *cdsi = NULL;

    tvar = sqrt(var / vlen);

/*     printf("\ntvar: % e", tvar); */
/*     fflush(NULL); */

    for (i = 0; i < cnum; ++i)
    {
        cdsi = cdsA->cds[i];
        CenMass(cdsi);
        //InvRotVec(&newcen[0], cdsA->avecds->center, cdsi->matrix);
        //ModCenMass(cdsi, &newcen[0]);

/*         printf("\n  trans[%d]", i+1); */
/*         printf("\nB trans[%d]: % f % f % f", i+1,  */
/*                cdsi->center[0], cdsi->center[1], cdsi->center[2]); */

        tvarbeta = tvar/cdsi->scale;

        cdsi->center[0] += gsl_ran_gaussian(r2, tvarbeta);
        cdsi->center[1] += gsl_ran_gaussian(r2, tvarbeta);
        cdsi->center[2] += gsl_ran_gaussian(r2, tvarbeta);

/*         printf("\nA trans[%d]: % f % f % f\n", i+1,  */
/*                cdsA->cds[i]->center[0], cdsA->cds[i]->center[1], cdsA->cds[i]->center[2]); */
/*         fflush(NULL); */

        ApplyCenterIp(cdsi);
    }
}


static void
GibbsTransDiag(CdsArray *cdsA, const gsl_rng *r2)
{
    int             i;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    double          tvar, invtr, tvarbeta;
    //double          newcen[3];
    Cds            *cdsi = NULL;

    invtr = 0.0;
    for (i = 0; i < vlen; ++i)
        invtr += (1.0 / cdsA->var[i]);

    tvar = sqrt(1.0 / invtr);

//    printf("\ntvar: %e", tvar);

/*     for (i = 0; i < vlen; ++i) */
/*         printf("\n  vartrans[%3d]: % 11.5f % 11.5f", i+1, cdsA->var[i], cdsA->w[i]); */

    for (i = 0; i < cnum; ++i)
    {
        cdsi = cdsA->cds[i];
        CenMassWtIp(cdsi, cdsA->w);
        //InvRotVec(&newcen[0], cdsA->avecds->center, cdsi->matrix);
        //ModCenMass(cdsi, &newcen[0]);

/*         printf("\n  trans[%d]", i+1); */
/*         printf("\nB trans[%d]: % f % f % f", i+1,  */
/*                cdsi->center[0], cdsi->center[1], cdsi->center[2]); */

        //tvarbeta = tvar/cdsi->scale;
        tvarbeta = tvar;

        cdsi->center[0] += gsl_ran_gaussian(r2, tvarbeta);
        cdsi->center[1] += gsl_ran_gaussian(r2, tvarbeta);
        cdsi->center[2] += gsl_ran_gaussian(r2, tvarbeta);

        //cdsi->center[2] = 0.0;

/*         printf("\nA trans[%d]: % f % f % f\n", i+1,  */
/*                cdsi->center[0], cdsi->center[1], cdsi->center[2]); */
/*         fflush(NULL); */

        ApplyCenterIp(cdsi);
        //NegTransCdsIp(cdsi, cdsi->center);
    }
}


static void
CdsInnerProduct(double *A, Cds *cds1, Cds *cds2, const int vlen, const double *weight)
{
    double          x1, x2, y1, y2, z1, z2;
    int             i;
    const double   *fx1 = cds1->x, *fy1 = cds1->y, *fz1 = cds1->z;
    const double   *fx2 = cds2->x, *fy2 = cds2->y, *fz2 = cds2->z;

    //A[0] = A[1] = A[2] = A[3] = A[4] = A[5] = A[6] = A[7] = A[8] = 0.0;
    memset(A, 0.0, 9 * sizeof(double));

    if (weight != NULL)
    {
        for (i = 0; i < vlen; ++i)
        {
             x1 = weight[i] * fx1[i];
             y1 = weight[i] * fy1[i];
             z1 = weight[i] * fz1[i];

             x2 = fx2[i];
             y2 = fy2[i];
             z2 = fz2[i];

             A[0] +=  (x1 * x2);
             A[1] +=  (y1 * x2);
             A[2] +=  (z1 * x2);

             A[3] +=  (x1 * y2);
             A[4] +=  (y1 * y2);
             A[5] +=  (z1 * y2);

             A[6] +=  (x1 * z2);
             A[7] +=  (y1 * z2);
             A[8] +=  (z1 * z2); 
        }
    }
    else
    {
        for (i = 0; i < vlen; ++i)
        {
             x1 = fx1[i];
             y1 = fy1[i];
             z1 = fz1[i];

             x2 = fx2[i];
             y2 = fy2[i];
             z2 = fz2[i];

//              A[0] +=  (x1 * x2);
//              A[1] +=  (x1 * y2);
//              A[2] +=  (x1 * z2);
// 
//              A[3] +=  (y1 * x2);
//              A[4] +=  (y1 * y2);
//              A[5] +=  (y1 * z2);
// 
//              A[6] +=  (z1 * x2);
//              A[7] +=  (z1 * y2);
//              A[8] +=  (z1 * z2);

             A[0] +=  (x1 * x2);
             A[1] +=  (y1 * x2);
             A[2] +=  (z1 * x2);

             A[3] +=  (x1 * y2);
             A[4] +=  (y1 * y2);
             A[5] +=  (z1 * y2);

             A[6] +=  (x1 * z2);
             A[7] +=  (y1 * z2);
             A[8] +=  (z1 * z2);
        }
    }
}


static void
MardiaRot3(double *R, const double *t)
{
    double  c1, c2, c3, s1, s2, s3;

    c1 = cos(t[0]);
    c2 = cos(t[1]);
    c3 = cos(t[2]);
    s1 = sin(t[0]);
    s2 = sin(t[1]);
    s3 = sin(t[2]);

    R[0] = c1*c2;
    R[1] = c2*s1;
    R[2] = s2;

    R[3] = -c3*s1 - c1*s2*s3;
    R[4] = c1*c3 - s1*s2*s3;
    R[5] = c2*s3;

    R[6] = s1*s3 - c1*c3*s2;
    R[7] = -c3*s1*s2 - c1*s3;
    R[8] = c2*c3;

//     R[0] = c1*c2;
//     R[1] = -c2*s1;
//     R[2] = -s2;
// 
//     R[3] = c3*s1 - c1*s2*s3;
//     R[4] = c1*c3 - s1*s2*s3;
//     R[5] = -c2*s3;
// 
//     R[6] = s1*s3 + c1*c3*s2;
//     R[7] = -c3*s1*s2 + c1*s3;
//     R[8] = c2*c3;
}


/* See:
   Green and Mardia (2006)
   "Bayesian alignment using hierarchical models, with applications in protein bioinformatics"
   Biometrika 93(2):235254 
   Esp. pp 241-242.
*/
void
GibbsMetRot(CdsArray *cdsA, double **theta, const gsl_rng *r2)
{
    int             i, j, k;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    double          var = cdsA->stats->var;
    double        **F = cdsA->tmpmat3a;
    Cds         *ave = cdsA->avecds;
    double          a12, b12, a13, b13, a23, b23;
    double          kap12, kap23, m, t12, t13, t23, width;
    double        **rotmat = NULL;

    for (i = 0; i < cnum; ++i)
    {
        //t12 = theta[i][0], t13 = theta[i][1], t23 = theta[i][2];
        t12 = t13 = t23 = 0.0; /* I'm farily convinced that this is valid, since when I rotate the structures below, */
                               /* I simply offet the structures by a "location parameter". */
                               /* I do the same for the translations. */
                               /* We can't do this if the chains are run out of place */

/*         printf("\n\n****************\nstructure: %d", i+1); */
/*         printf("\nF:"); */
/*         Mat3Print(F); */

        if (cdsA->algo->varweight > 0)
        {
            CdsInnerProduct(&F[0][0], ave, cdsA->cds[i], vlen, cdsA->w);

            for (j = 0; j < 3; ++j)
                for (k = 0; k < 3; ++k)
                    F[j][k] /= 2.0;
        }
        else
        {
            CdsInnerProduct(&F[0][0], ave, cdsA->cds[i], vlen, NULL);

            for (j = 0; j < 3; ++j)
                for (k = 0; k < 3; ++k)
                    F[j][k] /= (2.0 * var);
        }

        a12 = ( F[1][1] - sin(t13) * F[0][2]) * cos(t23) + (-F[1][2] - sin(t13) * F[0][1]) * sin(t23) + cos(t13) * F[0][0];
        b12 = (-F[0][1] - sin(t13) * F[1][2]) * cos(t23) + ( F[0][2] - sin(t13) * F[1][1]) * sin(t23) + cos(t13) * F[1][0];

        kap12 = sqrt(a12*a12 + b12*b12);
        m = atan2(b12, a12);
        //printf("\nkap12: % e  m:% e", kap12, m);
        t12 = theta[i][0] = vonmises_dev3(m, kap12, r2);
        //t12 = theta[i][0] = 0.0;

        a23 = ( F[1][1] - sin(t13) * F[0][2]) * cos(t12) + (-F[0][1] - sin(t13) * F[1][2]) * sin(t12) + cos(t13) * F[2][2];
        b23 = (-F[1][2] - sin(t13) * F[0][1]) * cos(t12) + ( F[0][2] - sin(t13) * F[1][1]) * sin(t12) + cos(t13) * F[2][1];

        kap23 = sqrt(a23*a23 + b23*b23);
        m = atan2(b23, a23);
        //printf("\nkap23: % e  m:% e", kap23, m);
        t23 = theta[i][2] = vonmises_dev3(m, kap23, r2);
        //t23 = theta[i][2] = 0.0;

        a13 = sin(t12) * F[1][0] + cos(t12) * F[0][0] + sin(t23) * F[2][1] + cos(t23) * F[2][2];
        b13 = (-sin(t23) * F[0][1] - cos(t23) * F[0][2]) * cos(t12) + (-sin(t23) * F[1][1] - cos(t23) * F[1][2]) * sin(t12) + F[2][0];

        width = sqrt(2.0 *(1.0/kap12 + 1.0/kap23));

        for (j = 0; j < 7; ++j)
            t13 = mardia_gadsden_met3(a13, b13, t13, r2, width);

        theta[i][1] = t13;
        //theta[i][1] = 0.0;

/*         printf("\ntheta: % f % f % f\n", theta[i][0], theta[i][1], theta[i][2]); */

        rotmat = cdsA->cds[i]->matrix;
        MardiaRot3(&rotmat[0][0], theta[i]);

/*         printf("\nrotmat:"); */
/*         Mat3Print(rotmat); */
/*         fflush(NULL); */
/*  */
/*         if (VerifyRotMat(rotmat, 1e-6) == 0) */
/*         { */
/*             printf("\nBAD ROTATION MATRIX\n\n"); */
/*             exit(EXIT_FAILURE); */
/*         } */

        Mat3TransposeIp(rotmat);

        RotateCdsIp(cdsA->cds[i], (const double **) rotmat);
    }
}


static int
CalcRotMat(double **rotmat, double **Umat, double *s, double **Vtmat)
{   
    int         i, j, k;
    double      det;

    memset(&rotmat[0][0], 0, 9 * sizeof(double));

    det = Mat3Det((const double **)Umat) * Mat3Det((const double **)Vtmat);

    if (det > 0)
    {
        for (i = 0; i < 3; ++i)
            for (j = 0; j < 3; ++j)
                for (k = 0; k < 3; ++k)
                    rotmat[i][j] += (Vtmat[k][i] * s[k] * Umat[j][k]);

        return(1);
    }
    else
    {
        /* printf("\n * determinant of SVD U or V matrix = %f", det); */

        for (i = 0; i < 3; ++i)
        {
            for (j = 0; j < 3; ++j)
            {
                for (k = 0; k < 2; ++k)
                    rotmat[i][j] += (Vtmat[k][i] * s[k] * Umat[j][k]);

                rotmat[i][j] -= (Vtmat[2][i] * Umat[j][2]);
            }
        }

        return(-1);
    }
}


static int
CalcGSLSVD(double **Rmat, double **Umat, double *sigma, double **VTmat)
{
    svdGSLDest(Rmat, 3, sigma, VTmat);
    Mat3TransposeIp(VTmat);
    Mat3Cpy(Umat, (const double **) Rmat);
    return(1);
}


/* See:
   Habeck (2009)
   "Generation of three-dimensional random rotations in fitting and matching problems."
   Comput Stat 24:719-731
   
   NB: broken, for now -- probably a transpose issue somewhere, either rotmat or V (should be Vt?)
*/
void
GibbsRot(CdsArray *cdsA, double **theta, const gsl_rng *r2)
{
    int             i, j, k;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    double          var = cdsA->stats->var;
    double        **A = cdsA->tmpmat3a;
    Cds            *ave = cdsA->avecds;
    double          alpha, beta, gamma, phi, psi, u, r, x, kappab, kappaphi, kappapsi, tmpa, tmpb;
    double        **rotmat = NULL;
    double        **V = cdsA->tmpmat3b;
    double         *lambda = malloc(3 * sizeof(double));
    double          a = 0.0;

    for (i = 0; i < cnum; ++i)
    {
        if (cdsA->algo->varweight > 0)
            CdsInnerProduct(&A[0][0], ave, cdsA->cds[i], vlen, cdsA->w);
        else
            CdsInnerProduct(&A[0][0], ave, cdsA->cds[i], vlen, NULL);
/*         printf("\n\n****************\nstructure: %d", i+1); */
/*         printf("\nF:"); */
/*         Mat3Print(F); */

        if (cdsA->algo->varweight > 0)
        {
            for (j = 0; j < 3; ++j)
                for (k = 0; k < 3; ++k)
                    A[j][k] *= 0.5;
        }
        else
        {
            for (j = 0; j < 3; ++j)
                for (k = 0; k < 3; ++k)
                    A[j][k] /= (2.0 * var);
        }

        rotmat = cdsA->cds[i]->matrix;

        CalcGSLSVD(rotmat, A, lambda, V);
        //svdGSLDest(A, 3, lambda, V);

        beta = theta[i][1];

        for (i = 0; i < 7; ++i)
        {
            tmpa = cos(0.5 * beta);
            tmpb = sin(0.5 * beta);
    
            kappaphi = tmpa*tmpa * (lambda[0] + lambda[1]);
            kappapsi = tmpb*tmpb * (lambda[0] - lambda[1]);
    
            phi = vonmises_dev(a, kappaphi, r2);
            psi = vonmises_dev(a, kappapsi, r2);
            u = gsl_ran_bernoulli (r2, 0.5);
    
            alpha = 0.5 * (phi + psi) + M_PI * u;
            gamma = 0.5 * (phi - psi) + M_PI * u;
    
            kappab = (lambda[0] + lambda[1]) * cos(phi) + (lambda[0] - lambda[1]) * cos(psi) + 2.0 * lambda[2];
            r = gsl_rng_uniform(r2);
            x = 1.0 + 2.0 * log(r + (1.0 -r) * exp(-kappab))/kappab;
            beta = acos(x);
        }

        CalcRotMat(rotmat, A, lambda, V);

/*         printf("\ntheta: % f % f % f\n", theta[i][0], theta[i][1], theta[i][2]); */

        theta[i][0] = alpha;
        theta[i][1] = beta;
        theta[i][2] = gamma;
        MardiaRot3(&rotmat[0][0], theta[i]);

/*         printf("\nrotmat:"); */
/*         Mat3Print(rotmat); */
/*         fflush(NULL); */
/*  */
/*         if (VerifyRotMat(rotmat, 1e-6) == 0) */
/*         { */
/*             printf("\nBAD ROTATION MATRIX\n\n"); */
/*             exit(EXIT_FAILURE); */
/*         } */

        theta[i][1] = beta;

        Mat3TransposeIp(rotmat);

        RotateCdsIp(cdsA->cds[i], (const double **) rotmat);
    }
    
    free(lambda);
}


static void
WriteSample(FILE *paramfile, CdsArray *cdsA, double **theta, const int iter)
{
    int             j, k;
    double         *quat = malloc(4 * sizeof(double));
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;

    fprintf(paramfile, "%-12s %12d\n", "BEG SAMPLE", iter);
    fprintf(paramfile, "%-12s %18.9e\n", "LOGL", cdsA->stats->logL);

    if (cdsA->algo->varweight > 0)
    {
        fprintf(paramfile, "%-12s %18.9e\n", "ALPHA", cdsA->stats->alpha);
        fprintf(paramfile, "%-12s %18.9e\n", "PHI", cdsA->stats->phi);
    }
    else
    {
        fprintf(paramfile, "%-12s %18.9e\n", "PHI", cdsA->stats->var);
    }

    for (j = 0; j < vlen; ++j)
        fprintf(paramfile, "%-12s %6d %18.9e\n", "VAR", j+1, cdsA->var[j]);

    for (j = 0; j < vlen; ++j)
        fprintf(paramfile, "%-12s %6d % 14.9f % 14.9f % 14.9f\n", "MEAN", 
                j+1, cdsA->avecds->x[j], cdsA->avecds->y[j], cdsA->avecds->z[j]);

    for (k = 0; k < cnum; ++k)
        fprintf(paramfile, "%-12s %6d % 18.9e % 18.9e % 18.9e\n", "TRANS", 
                k+1, cdsA->cds[k]->center[0], cdsA->cds[k]->center[1], cdsA->cds[k]->center[2]);

    for (k = 0; k < cnum; ++k)
    {
        fprintf(paramfile, "%-12s %6d", "ROT", k+1);

        for (j = 0; j < 3; ++j)
        {
            fprintf(paramfile,
                    //"% 12.3e % 12.3e % 12.3e",
                    "% 12.9f % 12.9f % 12.9f    ",
                    cdsA->cds[k]->matrix[j][0],
                    cdsA->cds[k]->matrix[j][1],
                    cdsA->cds[k]->matrix[j][2]);
        }

        fputc('\n', paramfile);
    }

    for (k = 0; k < cnum; ++k)
        fprintf(paramfile, "%-12s %6d % 18.9e % 18.9e % 18.9e\n", "THETA", 
                k+1, theta[k][0], theta[k][1], theta[k][2]);

    for (k = 0; k < cnum; ++k)
    {
        RotMatToQuaternion((const double **) cdsA->cds[k]->matrix, quat);
        fprintf(paramfile, "%-12s %6d % 18.9e % 18.9e % 18.9e % 18.9e\n", "QUAT", 
                k+1, quat[0], quat[1], quat[2], quat[3]);
    }

    if (cdsA->algo->scale > 0)
        for (j = 0; j < cnum; ++j)
            fprintf(paramfile, "%-12s %6d %18.9e\n", "SCALE", j+1, cdsA->cds[j]->scale);

    fprintf(paramfile, "%-12s %12d\n", "END SAMPLE", iter);
    free(quat);
}


void
RandInitGibbs(CdsArray *cdsA, const gsl_rng *r2)
{
    const int      cnum = cdsA->cnum, vlen = cdsA->vlen;
    int            slxn, i;

    slxn = (int) (gsl_rng_uniform(r2) * cnum);

    cdsA->stats->phi = 0.0; 
    for (i = 0; i < vlen; ++i) 
        cdsA->w[i] = cdsA->var[i] = 1.0; 
    RandRotCdsArray(cdsA, r2); 
    RandTransCdsArray(cdsA, 1000.0, r2); 

    CdsCopyAll(cdsA->avecds, cdsA->cds[slxn]); 
}


static double
CalcFrobTerm(CdsArray *cdsA)
{
    int             k, m;;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    const double   *var = (const double *) cdsA->var;
    const Cds  **cds = (const Cds **) cdsA->cds;
    const Cds   *cdsm = NULL;
    const double   *avex = (const double *) cdsA->avecds->x,
                   *avey = (const double *) cdsA->avecds->y,
                   *avez = (const double *) cdsA->avecds->z;
    double          fterm, tmpx, tmpy, tmpz;

    fterm = 0.0;
    for (k = 0; k < vlen; ++k)
    {
        for (m = 0; m < cnum; ++m)
        {
            cdsm = (Cds *) cds[m];

            tmpx = cdsm->x[k] - avex[k];
            tmpy = cdsm->y[k] - avey[k];
            tmpz = cdsm->z[k] - avez[k];

            fterm += (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz) / var[k];
        }
    }

    return(fterm);
}


static double
CalcFrobTermIso(CdsArray *cdsA)
{
    int             k, m;;
    const int       cnum = cdsA->cnum, vlen = cdsA->vlen;
    const Cds  **cds = (const Cds **) cdsA->cds;
    const Cds   *cdsm = NULL;
    const double   *avex = (const double *) cdsA->avecds->x,
                   *avey = (const double *) cdsA->avecds->y,
                   *avez = (const double *) cdsA->avecds->z;
    double          fterm, tmpx, tmpy, tmpz;

    fterm = 0.0;
    for (k = 0; k < vlen; ++k)
    {
        for (m = 0; m < cnum; ++m)
        {
            cdsm = (Cds *) cds[m];

            tmpx = cdsm->x[k] - avex[k];
            tmpy = cdsm->y[k] - avey[k];
            tmpz = cdsm->z[k] - avez[k];

            fterm += (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz);
        }
    }

    return(fterm);
}


static double
CalcHierarchPrLogL(CdsArray *cdsA)
{
    const double   *var = (const double *) cdsA->var;
    const int       vlen = cdsA->vlen;

    return(invgamma_logL(var, vlen, cdsA->stats->phi, 0.5));
}


static double
CalcLogScaleJacob(CdsArray *cdsA)
{
    double         scales;
    int            i;

    scales = 0.0;
    for (i = 0; i < cdsA->cnum; ++i)
        scales += log(cdsA->cds[i]->scale);

    return(3.0 * cdsA->vlen * scales);
}

static double
CalcLogLGibbs(CdsArray *cdsA)
{
    const int       vlen = cdsA->vlen;
    const double    cnum = cdsA->cnum;
    const double    nk = cnum * vlen;
    const double    nd = cnum * 3.0;
    const double    ndk = nk * 3.0;
    const double    ndk2 = 0.5 * ndk;
    const double   *var = (const double *) cdsA->var;
    double          lndet, frobterm, igL, scales;
    Algorithm      *algo = cdsA->algo;
    Statistics     *stats = cdsA->stats;
    int             i;

    lndet = frobterm = igL = 0.0;

    if (algo->leastsquares == 1)
    {
        lndet = vlen * log(cdsA->stats->var);
        frobterm = CalcFrobTermIso(cdsA) / cdsA->stats->var;
        igL = 0.0;
    }
    else if (algo->varweight == 1)
    {
        lndet = 0.0;
        for (i = 0; i < vlen; ++i)
            lndet += log(var[i]);

        frobterm = CalcFrobTerm(cdsA);
        igL = CalcHierarchPrLogL(cdsA);
    }

    scales = CalcLogScaleJacob(cdsA);
    stats->logL = scales 
                - (0.5 * frobterm) 
                - (ndk2 * log(2.0 * MY_PI)) 
                - (0.5 * nd * lndet) 
                + igL;

/*     printf("\n!      scales     frobterm        -ndk2          igL     lndet\n"); */
/*     printf("! % 12.4f % 12.4f % 12.4f % 12.4f % 12.4f\n", */
/*            scales, (-0.5 * frobterm), -ndk2, igL, (- 0.5 * nd * lndet)); */

    return(stats->logL);
}


double 
f(double x, void *params)
{
    double *p = (double *) params;
    double n = p[0];
    double gamma = p[1];
    double phi = p[2];
    double r = p[3];
    double f = pow(x,r) * CalcHalfNormChiLik(x, n, gamma, phi);
    //printf("x: %e  n: %e  gamma: %e  phi: %e  prob: %e\n", x, n, gamma, phi, f);
    //fflush(NULL);
    return f;
}


void
GibbsMet(CdsArray *cdsA)
{
    int                     i;
    const int               cnum = cdsA->cnum, vlen = cdsA->vlen;
    int                     burn = cdsA->algo->bayes;
    char                    paramfname[256];
    //char                    tmpname[256];
    double                **theta = MatAlloc(cnum, 3);
    const gsl_rng_type     *T = NULL;
    gsl_rng                *r2 = NULL;
    double                  invtr, mlik, hmlik, blik, amlik, diff, ediff, liksi;
    int                     badsamp, nsamp, burnin = 100;
    Cds                    *avecds = cdsA->avecds;
    FILE                   *paramfile = NULL;

    double *liks = malloc(burn * sizeof(double));

    cdsA->stats->phi = cdsA->stats->hierarch_p1;
    //printf("\nLogL: %e", CalcLogLGibbs(cdsA));
    blik = CalcLogLGibbs(cdsA);

    gsl_rng_env_setup();
    gsl_rng_default_seed = time(NULL);
    T = gsl_rng_ranlxs2;
    r2 = gsl_rng_alloc(T);

    sprintf(paramfname, "%s_%s.p", cdsA->algo->rootname, "gibbs");

    paramfile = fopen(paramfname, "w");
    if (paramfile == NULL)
    {
        perror("\n  ERROR");
        fprintf(stderr,
                "\n  ERROR99: could not open file '%s' for writing. \n", paramfname);
        PrintTheseusTag();
        exit(EXIT_FAILURE);
    }

    fprintf(paramfile, "%-12s %12d\n", "NUM", cnum);
    fprintf(paramfile, "%-12s %12d\n", "LEN", vlen);

    cdsA->stats->alpha = DBL_MAX;

    if (cdsA->algo->varweight > 0)
    {
        invtr = 0.0;
        for (i = 0; i < vlen; ++i)
        {
            invtr += 1.0 / cdsA->var[i];
            cdsA->w[i] = 1.0 / cdsA->var[i];
        }

        cdsA->stats->alpha = vlen / invtr;
        cdsA->stats->phi = vlen / invtr;
    }
    else
    {
        for (i = 0; i < vlen; ++i)
            cdsA->w[i] = 1.0 / cdsA->stats->var;
    }

    //////////////////////////////////////////
    //RandInitGibbs(cdsA, r2);

    CalcCdsPrincAxesGibbs(avecds, avecds->matrix, cdsA->w);
    Mat3TransposeIp(avecds->matrix);
    RotateCdsIp(avecds, (const double **) avecds->matrix);

    for (i = 0; i < cnum; ++i)
        RotateCdsIp(cdsA->cds[i], (const double **) avecds->matrix);

//     if (cdsA->algo->varweight > 0)
//          GibbsMeanDiagWrite(cdsA, r2, 0);
//     else
//          GibbsMeanWrite(cdsA, r2, 0);

    //printf("\nLogL: %e", CalcLogLGibbs(cdsA));

    for (i = 0; i < cnum; ++i)
    {
        //ScaleCds(cdsA->cds[i], i+1.0);
        //cdsA->cds[i]->scale = i+1.0;
        //cdsA->cds[i]->scale = 1.0;
    }

    VarCds(cdsA);
    printf("\nphi initial: % f", cdsA->stats->var);

    for (i = 1; i <= burn; ++i)
    {
        if (cdsA->algo->varweight > 0)
        {
            GibbsPhi(cdsA, r2);
            GibbsVarDiag(cdsA, r2);
            GibbsMeanDiag(cdsA, r2);
            GibbsTransDiag(cdsA, r2);
            //GibbsRot(cdsA, theta, r2);
            GibbsMetRot(cdsA, theta, r2);
            if (cdsA->algo->scale > 0)
                MetScaleDiag(cdsA, r2);
        }
        else
        {
            GibbsVar(cdsA, r2);
            GibbsMean(cdsA, r2);
            GibbsTrans(cdsA, r2);
            //GibbsRot(cdsA, theta, r2);
            GibbsMetRot(cdsA, theta, r2);
            if (cdsA->algo->scale > 0)
                MetScale(cdsA, r2);
        }

        //printf("\nLogL: %e", CalcLogLGibbs(cdsA));
        liks[i-1] = CalcLogLGibbs(cdsA);

        if (1)
        if (i%100 == 0)
        {
            if (i%1000 == 0)
            {
                if (cdsA->algo->varweight > 0)
                    GibbsMeanDiagWrite(cdsA, r2, i);
                else
                    GibbsMeanWrite(cdsA, r2, i);
            }

            WriteSample(paramfile, cdsA, theta, i);

/*             sprintf(tmpname, "%s_%s_%05d.pdb", cdsA->algo->rootname, "gibbs_ave", i); */
/*             WriteAveCds(cdsA, tmpname); */
        }
    }

    printf("    Done with Gibbs-Metropolis ...\n");
    fflush(NULL);


////////////////////////////////////////////////////////////////////////////////////////////////////
    blik = 0.0;
    for (i = burnin; i < burn; ++i)
        blik += liks[i];

    nsamp = burn - burnin;
    blik /= nsamp;

    mlik = hmlik = amlik = 0.0;
    badsamp = 0;
    for (i = burnin; i < burn; ++i)
    {
        liksi = liks[i];
        diff = liksi - blik;
        ediff = exp(diff);

        if (isfinite(ediff))
        {
            mlik += ediff;
            hmlik += 1.0 / ediff;
            amlik += liksi;
        }
        else
        {
            ++badsamp;
        }

/*         printf("\nnormalize: % e % e % e % e % e % e",  */
/*                liksi,  */
/*                blik,  */
/*                liksi - blik,  */
/*                exp(liksi - blik),  */
/*                exp(diff),  */
/*                exp(-diff)); */
    }

    nsamp -= badsamp;

    printf("\n    Marginal likelihood: % 14.2f % 14.2f % 14.2f\n", 
           log(mlik / nsamp) + blik, blik - log(hmlik) + log(nsamp), amlik / nsamp);

    /* Newton and Raftery 1994
       Approximate Bayesian inference with the weighted likelihood bootstrap (with discussion). 
       Journal of the Royal Statistical Society, Series B, 56:3-48.
       Equation 16, p 22
       
       They suggest delta = 0.01, something "small".
       Curiously, delta = 0.5 results in the average posterior log likelihood 
       (which is different from the average posterior likelihood)
       I guess that can be thought of as the entropy of the posterior distribution */
    int cnt = 0;
    double term, fac, delta, num, denom, oldbf, bf = 0.0;
    delta = 0.5;
    term = delta * (burn-burnin) / (1.0 - delta);
    do
    {
        ++cnt;
        oldbf = bf;
        num = denom = 0.0;
        for (i = burnin; i < burn; ++i)
        {
            liksi = liks[i];
            diff = liksi - blik;
            ediff = exp(diff);
            fac = delta*bf + (1.0-delta)*ediff;
            num += ediff / fac;
            denom += 1.0 / fac;
        }
        
        bf = (term + num) / (term * bf + denom);
        //printf("Marginal likelihood2[%3d]: % 14.2f \n", cnt, log(bf) + blik);
    }
    while (fabs(oldbf - bf) > bf * 1e-7 && cnt < 1000);
    printf("    Marginal likelihood2[%3d]: % 14.6f \n\n", cnt, log(bf) + blik);

////////////////////////////////////////////////////////////////////////////////////////////////////

    double Cm, lik;
    double x,n,phi,gamma;
    FILE *metfp = fopen("metropolis.txt", "w");
    FILE *rejfp = fopen("rejection.txt", "w");

    int samples = 100000;

    n=100;
    phi=100;
    gamma=100;

    double scalemax = ScaleMax(n,gamma,phi);
    printf("Mx: % e\n", scalemax);
    printf("FI: % e\n", 1.0 / (phi + (n-1.0)/(scalemax*scalemax)));
    printf("~Ex: % e\n", 1.0+(gamma/sqrt(phi)));
    printf("~Ex_dlt: % e\n", sqrt(n/phi));

#if 0 //defined(__APPLE__)
    double          starttime, endtime;
    double          exitloop;

    starttime = seconds();
#endif

    double sm = ScaleMax(n,gamma,phi);
    double width = sqrt(1.0 / (phi + (n-1.0)/(sm*sm)));
    double xv = sm;
    int skip = 3;

    for (i = 0; i < samples; ++i)
    {
        xv = scale_log_met3(n, gamma, phi, xv, r2, sm, width, skip);
        fprintf(metfp, "%e\n", xv);
    }


#if 0 //defined(__APPLE__)
    endtime = seconds();
    exitloop = (double) (endtime - starttime) / 0.001;

    printf("\nmet time: %7.2f (ms) \n", exitloop);
    fflush(NULL);
#endif


#if 0 //defined(__APPLE__)
    starttime = seconds();
#endif

if (0)
    for (i = 0; i < samples; ++i)
    {
        fprintf(rejfp, "%e\n", scale_rejection(n, gamma, phi, r2));
    }

#if 0 //defined(__APPLE__)
    endtime = seconds();
    exitloop = (double) (endtime - starttime) / 0.001;

    printf("\nrej time: %7.2f (ms) \n", exitloop);
    fflush(NULL);
#endif

    fclose(metfp);
    fclose(rejfp);

//     printf("1F1:\n");
//     fflush(NULL);
//     printf("1F1: %e\n", gsl_sf_hyperg_1F1(0.5*(n+1.0), 1.5, 0.5*gamma*gamma/phi));
//     fflush(NULL);

    //C = CalcNormConst(n, gamma, phi);

    if (0)
    {
        Cm = CalcNormConstMm(n, gamma, phi);
        for (i=0;i<100;++i)
        {
            x=i*0.1;
            lik = CalcHalfNormChi(x, n, gamma, phi);

            //printf("L-[%3d]: %f % e\n", i, x, lik);
            //printf("CL[%3d]: %f % e\n", i, x, C*lik);
            printf("CLm[%3d]: %f % e\n", i, x, lik);
        }
    }

    //double integral1 = integrate_romberg_f3(CalcHalfNormChi, n, gamma, phi, 0.0, 100.0);
    //printf("integral: %e\n", integral1);

    //printf("Ex: %e\n", ExpectScale(n,gamma,phi));

    //printf("Dnz[]: % 7.4e\n", CalcDnz(-n, -gamma / sqrt(phi)));
    //printf("Dnz[]: % 7.4e\n", CalcDnz(2, 2));
    //printf("C-: % .18f\n", C);
    Cm = CalcNormConstMm(n, gamma, phi);
    printf("Cm: % .18f\n", Cm);

/*  for (i=0;i<50;++i) */
/*      printf("Uax[-1.5, %f]: % 7.4e\n", i*0.1, CalcUax(5, i*0.1)); */

    //printf("U(): %e\n", gsl_sf_hyperg_U(-2.0, 0.5, 1.13));

////////////////////////////////////////////////////////////////////////////////////////////////////

    #include <gsl/gsl_integration.h>
    gsl_integration_workspace      *w = gsl_integration_workspace_alloc(1000);
    double                         *params = malloc(4 * sizeof(double));
    double                          result, error;
    double                          p, m1, m2, m3, m4, v, s, k, sd;

    params[0] = n;
    params[1] = gamma;
    params[2] = phi;

    gsl_function F;
    F.function = &f;
    F.params = &params[0];

    //gsl_integration_qag(&F, 0.0, 10.0, 0.0, 1e-7, 1000, GSL_INTEG_GAUSS61, w, &result, &error);
    params[3] = 0.0;
    gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error);
    printf ("result (C)      = % .18e +/- % .18e\n", result, error);
    printf ("result (1/C)    = % .18e +/- % .18e\n", 1.0/result, error/(result*result));
    p = 1.0/result;

    params[3] = 1.0;
    gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error);
    printf ("result (m1)     = % .18f +/- % .18f\n", p*result, p*error);
    m1 = p*result;

    params[3] = 2.0;
    gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error);
    printf ("result (m2)     = % .18f +/- % .18f\n", p*result, p*error);
    m2 = p*result;

    params[3] = 3.0;
    gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error);
    printf ("result (m1)     = % .18f +/- % .18f\n", p*result, p*error);
    m3 = p*result;

    params[3] = 4.0;
    gsl_integration_qagiu(&F, 0.0, 0.0, 1e-6, 1000, w, &result, &error);
    printf ("result (m2)     = % .18f +/- % .18f\n", p*result, p*error);
    m4 = p*result;

    v = m2 - m1*m1;
    sd = sqrt(v);
    s = (2.0*m1*m1*m1 - 3.0*m1*m2 + m3)/(sd*sd*sd);
    k = 3.0-(-3.0*m1*m1*m1*m1 + 6.0*m1*m1*m2 - 4.0*m1*m3 + m4)/(sd*sd*sd*sd);
    printf ("emp exp = % e\n", m1);
    printf ("emp var = % e\n", v);
    printf ("emp skw = % e\n", s);
    printf ("emp kur = % e\n", k);

////////////////////////////////////////////////////////////////////////////////////////////////////

    MatDestroy(&theta);
    fclose(paramfile);
    free(liks);
    free(params);
    gsl_integration_workspace_free(w);
    gsl_rng_free(r2);
}

