msmpot_internal.h Source File

00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 2008-2009 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: msmpot_internal.h,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.5 $      $Date: 2011/01/14 16:09:28 $
00015  *
00016  ***************************************************************************/
00017 
00018 #ifndef MSMPOT_INTERNAL_H
00019 #define MSMPOT_INTERNAL_H
00020 
00021 #include <stdlib.h>
00022 #include <stdio.h>
00023 #include <string.h>
00024 #include <math.h>
00025 #include "msmpot.h"
00026 
00027 /* avoid parameter name collisions with AIX5 "hz" macro */
00028 #undef hz
00029 
00030 #ifdef __cplusplus
00031 extern "C" {
00032 #endif
00033 
00034 
00035   /* for 32-bit builds, IndexType is simply int */
00036   typedef int IndexType;
00037 
00038 
00039   /* create a 3D grid to access data of given TYPE */
00040 #undef GRID_TEMPLATE
00041 #define GRID_TEMPLATE(TYPE) \
00042   typedef struct TYPE##Grid_t { \
00043     TYPE *buffer;       /* raw buffer */ \
00044     TYPE *data;         /* data access offset from buffer */ \
00045     size_t numbytes;    /* number of bytes in use by buffer */ \
00046     size_t maxbytes;    /* actual allocation for buffer */ \
00047     int i0, j0, k0;     /* starting index value for each dimension */ \
00048     int ni, nj, nk;     /* number of elements in each dimension */ \
00049   } TYPE##Grid        /* expect closing ';' */
00050 
00051   /* initialize grid to empty */
00052 #define GRID_INIT(a) \
00053   ((a)->buffer=NULL, (a)->data=NULL, (a)->numbytes=0, (a)->maxbytes=0, \
00054    (a)->i0=0, (a)->j0=0, (a)->k0=0, (a)->ni=0, (a)->nj=0, (a)->nk=0)  /* ; */
00055 
00056   /* finished with grid, free its memory */
00057 #define GRID_DONE(a) \
00058   free((a)->buffer)  /* ; */
00059 
00060   /* determine the signed flattened index for 3D grid datum */
00061 #define GRID_INDEX(a, _i, _j, _k) \
00062   (((_k)*((a)->nj) + (_j))*(IndexType)((a)->ni) + (_i))  /* ; */
00063 
00064   /* obtain pointer to 3D grid datum */
00065 #define GRID_POINTER(a, _i, _j, _k) \
00066   ((a)->data + GRID_INDEX(a, _i, _j, _k))  /* ; */
00067 
00068   /* resize 3D grid buffer, setup its indexing */
00069   /* grab more memory when needed */
00070   /* (must be used within function returning int) */
00071 #define GRID_RESIZE(a, __i0, __ni, __j0, __nj, __k0, __nk) \
00072   do { \
00073     int _i0=(__i0), _ni=(__ni); \
00074     int _j0=(__j0), _nj=(__nj); \
00075     int _k0=(__k0), _nk=(__nk); \
00076     size_t _numbytes = (_nk * _nj) * (size_t) _ni * sizeof((a)->buffer[0]); \
00077     if ((a)->maxbytes < _numbytes) { \
00078       void *_t = realloc((a)->buffer, _numbytes); \
00079       if (NULL == _t) return ERROR(MSMPOT_ERROR_MALLOC); \
00080       (a)->buffer = (float *) _t; \
00081       (a)->maxbytes = _numbytes; \
00082     } \
00083     (a)->numbytes = _numbytes; \
00084     (a)->i0 = _i0, (a)->ni = _ni; \
00085     (a)->j0 = _j0, (a)->nj = _nj; \
00086     (a)->k0 = _k0, (a)->nk = _nk; \
00087     (a)->data = (a)->buffer + GRID_INDEX((a), -_i0, -_j0, -_k0); \
00088   } while (0)  /* expect closing ';' */
00089 
00090   /* reset 3D grid data to 0 */
00091 #define GRID_ZERO(a) \
00092   memset((a)->buffer, 0, (a)->numbytes)  /* ; */
00093 
00094   /* check 3D grid index range when debugging */
00095   /* (must be used within function returning int) */
00096 #ifdef MSMPOT_DEBUG
00097 #define GRID_INDEX_CHECK(a, _i, _j, _k) \
00098   do { \
00099     ASSERT((a)->i0 <= (_i) && (_i) < (a)->ni + (a)->i0); \
00100     ASSERT((a)->j0 <= (_j) && (_j) < (a)->nj + (a)->j0); \
00101     ASSERT((a)->k0 <= (_k) && (_k) < (a)->nk + (a)->k0); \
00102   } while (0)  /* expect closing ';' */
00103 #else
00104 #define GRID_INDEX_CHECK(a, _i, _j, _k)
00105 #endif
00106 
00107 
00108   /* default MSM parameters */
00109 #define DEFAULT_HMIN        2.f
00110 #define DEFAULT_CUTOFF     12.f
00111 #define DEFAULT_INTERP     MSMPOT_INTERP_CUBIC
00112 #define DEFAULT_SPLIT      MSMPOT_SPLIT_TAYLOR2
00113 
00114 #define DEFAULT_BINLENMAX   4.f
00115 
00116 #define DEFAULT_BINDEPTH    8      /* set for CUDA hardware */
00117 #define DEFAULT_BINFILL     0.75f  /* try to achieve average bin fill */
00118 #define DEFAULT_DENSITY     0.1f   /* for atom biomolecule (units 1/A^3) */
00119 
00120 #define DEFAULT_OVER       20
00121 
00122 #define DEFAULT_ERRTOL      5e-3   /* for (1/2)% relative error */
00123 
00124 #define ATOM_SIZE  4      /* number of floats per atom, stored x/y/z/q */
00125 
00126 #define ATOM_X(i)  ((i)<<2)      /* index for x coordinate of atom i */
00127 #define ATOM_Y(i)  (((i)<<2)+1)  /* index for y coordinate of atom i */
00128 #define ATOM_Z(i)  (((i)<<2)+2)  /* index for z coordinate of atom i */
00129 #define ATOM_Q(i)  (((i)<<2)+3)  /* index for q charge of atom i */
00130 
00131 #define SET_X(flag)       ((flag) |= 0x01)
00132 #define SET_Y(flag)       ((flag) |= 0x02)
00133 #define SET_Z(flag)       ((flag) |= 0x04)
00134 #define IS_SET_X(flag)    ((flag) & 0x01)
00135 #define IS_SET_Y(flag)    ((flag) & 0x02)
00136 #define IS_SET_Z(flag)    ((flag) & 0x04)
00137 #define IS_SET_ANY(flag)  ((flag) & 0x07)
00138 #define IS_SET_ALL(flag)  ((flag) == 0x07)
00139 
00140 
00141   GRID_TEMPLATE(float);   /* for MSM charge and potential grids */
00142 
00143 
00144 #ifdef MSMPOT_CUDA
00145   struct MsmpotCuda_t;    /* forward definition of MsmpotCuda structure */
00146   typedef struct MsmpotCuda_t MsmpotCuda;
00147 
00148   MsmpotCuda *Msmpot_cuda_create(void);
00149   void Msmpot_cuda_destroy(MsmpotCuda *);
00150   int Msmpot_cuda_setup(MsmpotCuda *, Msmpot *);
00151 
00152   int Msmpot_cuda_compute_shortrng(MsmpotCuda *);
00153 
00154   int Msmpot_cuda_compute_latcut(MsmpotCuda *);
00155   int Msmpot_cuda_condense_qgrids(MsmpotCuda *);
00156   int Msmpot_cuda_expand_egrids(MsmpotCuda *);
00157 #endif
00158 
00159 
00160   /*** Msmpot ****************************************************************/
00161 
00162   struct Msmpot_t {
00163     float *epotmap;       /* the map */
00164     int mx, my, mz;       /* map dimensions */
00165     float lx, ly, lz;     /* map lengths (rectangular box) */
00166     float lx0, ly0, lz0;  /* map origin */
00167     float dx, dy, dz;     /* map grid spacing, dx=lx/mx, etc. */
00168 
00169     const float *atom;    /* the original atom array stored x/y/z/q */
00170     int natoms;           /* number of atoms, atom array has 4*natoms floats */
00171 
00172     int isperiodic;       /* flag for periodicity in x, y, z */
00173     float px, py, pz;     /* atom domain lengths (whether or not periodic) */
00174     float px0, py0, pz0;  /* atom domain origin */
00175 
00176     float density;        /* expected density of system */
00177 
00178     float xmin, xmax;     /* max and min x-coordinates of atoms */
00179     float ymin, ymax;     /* max and min y-coordinates of atoms */
00180     float zmin, zmax;     /* max and min z-coordinates of atoms */
00181 
00182     /*
00183      * Short-range part:  spatial hashing of atoms into bins,
00184      * calculate short-range contribution to each grid point as
00185      * the sum of surrounding neighborhood of bins.
00186      *
00187      * Bins are accesed in a grid starting at 0:
00188      *   bin(i,j,k) == bin[ (k*nby + j)*nbx + i ]
00189      *
00190      * The bincount array corresponds to the bin array giving
00191      * number of atoms stored in each bin.
00192      *
00193      * The bins cover the atom domain starting at (px0,py0,pz0).
00194      *
00195      * The bin size is chosen to achieve a BINFILL*bindepth average
00196      * fill rate per bin.
00197      */
00198 
00199     float *bin;           /* length 4*bindepth*maxbins */
00200     int *bincount;        /* number of atoms in each bin */
00201     int bindepth;         /* number of atom slots per bin (from GPU hardware) */
00202     int nbx, nby, nbz;    /* number of bins in grid along x, y, z */
00203     int maxbin;           /* maximum allocation of bins >= nbx*nby*nbz */
00204     int isbinwrap;        /* flag for bin neighborhood wrapping in x, y, z */
00205     int islongcutoff;     /* flag for cutoff wrapping beyond nearest image */
00206     float binfill;        /* set bin size for this fill ratio (from user) */
00207     float bx, by, bz;     /* bx = px/nbx, by = py/nby, bz = py/nbz */
00208     float invbx, invby, invbz;  /* inverse lengths */
00209 
00210     float *over;          /* bin overflow list, length 4*maxoverflow */
00211     int nover;            /* how many atoms in overflow list */
00212     int maxover;          /* maximum allocation for list */
00213 
00214     int *boff;            /* neighborhood as bin index offsets */
00215     int nboff;            /* number of offsets, length is 3*nbdoff */
00216     int maxboff;          /* maximum allocation */
00217 
00218     /*
00219      * Fundamental MSM parameters:
00220      *
00221      * Default grid spacing hmin = 2A, cutoff a = 12A,
00222      * C1 cubic interpolation, C2 Taylor splitting,
00223      * all gives reasonable accuracy for atomistic systems.
00224      *
00225      * Find grid spacings in range hmin <= hx, hy, hz <= (1.5 * hmin)
00226      * such that along periodic dimensions the number of grid points is
00227      * some power of 2 times zero or one power of 3.
00228      *
00229      * Maintain ratio 4 <= a/hx, a/hy, a/hz <= 6 for accuracy,
00230      * and constants can fit into GPU.
00231      *
00232      *
00233      * for nonperiodic dimensions, the finest level lattice is chosen to be
00234      * smallest size aligned with epotmap containing both atoms and epotmap;
00235      * for periodic dimensions, the finest level lattice fits within cell
00236      * defined by epotmap parameters above;
00237      * the number of levels is determined to reduce coarsest level lattice
00238      * to be as small as possible for the given boundary conditions
00239      */
00240     float errtol;         /* error tolerance for convergence of "exact" PBC */
00241 
00242     float hmin;           /* smallest MSM grid spacing, hmax = 1.5 * hmin */
00243     float hx, hy, hz;     /* the finest level lattice spacings */
00244     float a;              /* the MSM cutoff between short- and long-range */
00245     int nx, ny, nz;       /* count number of h spacings that cover domain */
00246 
00247     int interp;           /* the interpolant MSMPOT_INTERP_ */
00248     int split;            /* the splitting MSMPOT_SPLIT_ */
00249 
00250     int nlevels;          /* number of lattice levels */
00251 
00252     /*
00253      * Grids for calculating long-range part:
00254      * q[0] is finest-spaced grid (hx, hy, hz),
00255      * grid level k has spacing 2^k * (hx, hy, hz).
00256      *
00257      * Use domain origin (px0, py0, pz0) for each grid origin, ao that
00258      * q[0](i,j,k) has position (i*hx + px0, j*hy + py0, k*hz + pz0)
00259      * the finest level lattice is 0, e.g. q0 = qh[0].
00260      *
00261      * Grid indexes can be negative for non-periodic dimensions,
00262      * the periodic dimensions wrap around edges of grid.
00263      */
00264 
00265     floatGrid *qh;        /* grids of charge, 0==finest */
00266     floatGrid *eh;        /* grids of potential, 0==finest */
00267     floatGrid *gc;        /* fixed-size grids of weights for convolution */
00268     int maxlevels;        /* maximum number of grid levels allocated */
00269 
00270 
00271 
00272     /* Interpolating from finest lattice to epotmap:
00273      * want ratio hx/dx to be rational 2^(px2)*3^(px3),
00274      * where px2 is unrestricted and px3=0 or px3=1.
00275      * The interpolation of epotmap from finest lattice then has
00276      * a fixed cycle of coefficients that can be precomputed.
00277      * The calculation steps through MSM lattice points and
00278      * adds their contribution to surrounding epotmap points. */
00279 
00280     int px2, py2, pz2;    /* powers of 2 */
00281     int px3, py3, pz3;    /* powers of 3 */
00282     float hx_dx, hy_dy, hz_dz;  /* scaling is integer for px2 >= 0 */
00283 
00284     int cycle_x, cycle_y, cycle_z;  /* counts MSM points between alignment */
00285     int rmap_x, rmap_y, rmap_z;     /* radius of map points about MSM point */
00286 
00287     int max_phi_x, max_phi_y, max_phi_z;  /* alloc length of phi arrays */
00288     float *phi_x;         /* coefficients, size cycle_x * (2*rmap_x + 1) */
00289     float *phi_y;         /* coefficients, size cycle_y * (2*rmap_y + 1) */
00290     float *phi_z;         /* coefficients, size cycle_z * (2*rmap_z + 1) */
00291 
00292 
00293 
00294     /* need these */
00295 
00296     int max_ezd, max_eyzd;  /* alloc length of interp temp buffer space */
00297     float *ezd;           /* interpolation temp row buffer, length mz */
00298     float *eyzd;          /* interpolation temp plane buffer, length my*mz */
00299 
00300     int max_lzd, max_lyzd;  /* alloc length of factor temp buffer space */
00301     float *lzd;           /* factor temp row buffer, length z-dim of h-level */
00302     float *lyzd;          /* factor temp row buffer, (y*z)-dim of h-level */
00303 
00304 
00305     /* cursor linked list implementation */
00306     int maxatoms;                      /* allocated number of atoms */
00307     int *first_atom_index;             /* length maxcells >= ncells */
00308     int *next_atom_index;              /* length maxatoms >= natoms */
00309 
00310 #ifdef MSMPOT_CUDA
00311     MsmpotCuda *msmcuda;    /* handle to "MsmpotCuda" (CUDA-compute) object */
00312     const int *devlist;     /* list of devices, prioritized highest to lowest */
00313     int devlistlen;         /* length of devlist */
00314     int cuda_optional;      /* flag CUDA is optional, fall back on CPU */
00315     int use_cuda;           /* flag use of CUDA */
00316     int use_cuda_shortrng;  /* flag use of CUDA for short-range part */
00317     int use_cuda_latcut;    /* flag use of CUDA for lattice cutoff part */
00318 #endif
00319   };
00320 
00321 
00322   /* for internal use only */
00323   void Msmpot_set_defaults(Msmpot *msm);
00324   int Msmpot_check_params(Msmpot *msm, const float *epotmap,
00325       int mx, int my, int mz, float lx, float ly, float lz,
00326       float vx, float vy, float vz, const float *atom, int natoms);
00327 
00328   int Msmpot_setup(Msmpot *msm);
00329   void Msmpot_cleanup(Msmpot *msm);
00330 
00331   /*
00332    * CPU implementation: hash atoms into bins,
00333    * evaluate grid points over neighborhood of bins
00334    */
00335   int Msmpot_compute_shortrng_bins(Msmpot *msm);
00336 
00337   /*
00338    * Determine the bin neighborhood for a given "region":
00339    * - for CPU, just give the bin lengths
00340    * - for GPU, give the region sizes, e.g., REGSIZE_X * dx
00341    */
00342   int Msmpot_compute_shortrng_bin_neighborhood(Msmpot *msm,
00343       float rx,  /* region length in x-dimension */
00344       float ry,  /* region length in y-dimension */
00345       float rz   /* region length in z-dimension */
00346       );
00347 
00348   /*
00349    * Perform the spatial hashing of atoms into bins,
00350    * place any extra atoms that overflow bins into overflow list,
00351    * can use for both GPU and CPU.
00352    */
00353   int Msmpot_compute_shortrng_bin_hashing(Msmpot *msm);
00354 
00355   /*
00356    * Use linklist data structure for spatial hashing of atoms:
00357    * - for CPU, when using by itself, give entire atom list
00358    * - for GPU or CPU, when using with bin hashing, give overflow atom list
00359    */
00360   int Msmpot_compute_shortrng_linklist(Msmpot *msm,
00361       const float *atom,    /* array of atoms stored x/y/z/q */
00362       int natoms            /* number of atoms in array */
00363       );
00364 
00365   int Msmpot_compute_longrng(Msmpot *msm);
00366 
00367   int Msmpot_compute_longrng_cubic(Msmpot *msm);
00368 
00369 
00370   /* exception handling:
00371    * MSMPOT_DEBUG turns on error reporting to stderr stream, 
00372    * in any case propagate error number back up the call stack */
00373 #undef  ERROR
00374 #ifndef MSMPOT_DEBUG
00375 #define ERROR(err)       (err)
00376 #define ERRMSG(err,msg)  (err)
00377 #else
00378   /* report error to stderr stream, return error code "err" */
00379   int Msmpot_report_error(int err, const char *msg, const char *fn, int ln);
00380 #define ERROR(err)       Msmpot_report_error(err, NULL, __FILE__, __LINE__)
00381 #define ERRMSG(err,msg)  Msmpot_report_error(err, msg, __FILE__, __LINE__)
00382 #endif
00383 
00384 
00385   /* check assertions when debugging, raise exception if failure */
00386 #ifndef MSMPOT_DEBUG
00387 #define ASSERT(expr)
00388 #else
00389 #define ASSERT(expr) \
00390   do { \
00391     if ( !(expr) ) { \
00392       return Msmpot_report_error(MSMPOT_ERROR_ASSERT, \
00393                                  #expr, __FILE__, __LINE__); \
00394     } \
00395   } while (0)
00396 #endif
00397 
00398 
00399   /* MSMPOT_VERBOSE enables MSMPOT_REPORT */
00400 #ifdef MSMPOT_VERBOSE
00401 #undef MSMPOT_REPORT
00402 #define MSMPOT_REPORT
00403 #endif
00404 
00405   /* report status of MSMPOT calculation */
00406 #ifndef MSMPOT_REPORT
00407 #define REPORT(msg)
00408 #else
00409 #define REPORT(msg)  printf("MSMPOT: %s\n", (msg))
00410 #endif
00411 
00412 
00413   /* SPOLY() calculates the polynomial part of the
00414    * normalized smoothing of 1/r, i.e. g_1((r/a)**2).
00415    *
00416    *   pg - float*, points to variable to receive the result
00417    *   s - (ra/)**2, assumed to be between 0 and 1
00418    *   split - identify the type of smoothing used to split the potential */
00419 #undef  SPOLY
00420 #define SPOLY(pg, s, split) \
00421   do { \
00422     float _s = s;  /* where s=(r/a)**2 */ \
00423     float _g = 0; \
00424     ASSERT(0 <= _s && _s <= 1); \
00425     switch (split) { \
00426       case MSMPOT_SPLIT_TAYLOR2: \
00427         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8)); \
00428         break; \
00429       case MSMPOT_SPLIT_TAYLOR3: \
00430         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16))); \
00431         break; \
00432       case MSMPOT_SPLIT_TAYLOR4: \
00433         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \
00434                 + (_s-1)*(35.f/128)))); \
00435         break; \
00436       default: \
00437         return ERRMSG(MSMPOT_ERROR_SUPPORT, \
00438             "splitting function not implemented"); \
00439     } \
00440     *(pg) = _g; \
00441   } while (0)
00442   /* closing ';' from use as function call */
00443 
00444 
00445 #ifdef __cplusplus
00446 }
00447 #endif
00448 
00449 #endif /* MSMPOT_INTERNAL_H */