Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

VolCPotential.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 /***************************************************************************
00009  * RCS INFORMATION:
00010  *
00011  *      $RCSfile: VolCPotential.C,v $
00012  *      $Author: johns $        $Locker:  $             $State: Exp $
00013  *      $Revision: 1.30 $      $Date: 2019/01/17 21:21:02 $
00014  *
00015  ***************************************************************************/
00016 /*
00017  * Calculate a coulombic potential map
00018  */
00019 #include <stdio.h>
00020 #include <stdlib.h>
00021 #include <math.h>
00022 #include <string.h>
00023 
00024 #include "config.h"         // force recompile when configuration changes
00025 #include "utilities.h"
00026 #include "Inform.h"
00027 #include "WKFThreads.h"
00028 #include "WKFUtils.h"
00029 #include "VolCPotential.h" 
00030 #if defined(VMDCUDA)
00031 #include "CUDAKernels.h"
00032 #endif
00033 #if defined(VMDOPENCL)
00034 #include "OpenCLKernels.h"
00035 #endif
00036 
00037 typedef struct {
00038   float* atoms;
00039   float* grideners;
00040   long int numplane;
00041   long int numcol;
00042   long int numpt;
00043   long int natoms;
00044   float gridspacing;
00045 } enthrparms;
00046 
00047 #if 1 || defined(__INTEL_COMPILER)
00048 
00049 #define FLOPSPERATOMEVAL 6.0
00050 extern "C" void * energythread(void *voidparms) {
00051   int threadid;
00052   enthrparms *parms = NULL;
00053   wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00054   wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00055 
00056   /* 
00057    * copy in per-thread parameters 
00058    */
00059   const float *atoms = parms->atoms;
00060   float* grideners = parms->grideners;
00061   const long int numplane = parms->numplane;
00062   const long int numcol = parms->numcol;
00063   const long int numpt = parms->numpt;
00064   const long int natoms = parms->natoms;
00065   const float gridspacing = parms->gridspacing;
00066   int i, j, k, n;
00067   double lasttime, totaltime;
00068 
00069   /* Calculate the coulombic energy at each grid point from each atom
00070    * This is by far the most time consuming part of the process
00071    * We iterate over z,y,x, and then atoms
00072    */
00073 
00074   printf("thread %d started...\n", threadid);
00075   wkf_timerhandle timer = wkf_timer_create();
00076   wkf_timer_start(timer);
00077   wkfmsgtimer *msgt = wkf_msg_timer_create(5);
00078 
00079   // Holds atom x, dy**2+dz**2, and atom q as x/r/q/x/r/q...
00080   float * xrq = (float *) malloc(3*natoms * sizeof(float)); 
00081   int maxn = natoms * 3;
00082 
00083   // For each point in the cube...
00084   wkf_tasktile_t tile;
00085   while (wkf_threadlaunch_next_tile(voidparms, 2, &tile) != WKF_SCHED_DONE) {
00086     for (k=tile.start; k<tile.end; k++) { 
00087       const float z = gridspacing * (float) k;
00088       lasttime = wkf_timer_timenow(timer);
00089       for (j=0; j<numcol; j++) {
00090         const float y = gridspacing * (float) j;
00091         long int voxaddr = numcol*numpt*k + numpt*j;
00092 
00093         // Prebuild a table of dy and dz values on a per atom basis
00094         for (n=0; n<natoms; n++) {
00095           int addr3 = n*3;
00096           int addr4 = n*4;
00097           float dy = y - atoms[addr4 + 1];
00098           float dz = z - atoms[addr4 + 2];
00099           xrq[addr3    ] = atoms[addr4];
00100           xrq[addr3 + 1] = dz*dz + dy*dy;
00101           xrq[addr3 + 2] = atoms[addr4 + 3];
00102         }
00103 
00104 // help the vectorizer make reasonable decisions
00105 #if defined(__INTEL_COMPILER)
00106 #pragma vector always 
00107 #endif
00108         /* walk through more than one grid point at a time */
00109         for (i=0; i<numpt; i+=8) {
00110           float energy1 = 0.0f;           /* Energy of first grid point */
00111           float energy2 = 0.0f;           /* Energy of second grid point */
00112           float energy3 = 0.0f;           /* Energy of third grid point */
00113           float energy4 = 0.0f;           /* Energy of fourth grid point */
00114           float energy5 = 0.0f;           /* Energy of fourth grid point */
00115           float energy6 = 0.0f;           /* Energy of fourth grid point */
00116           float energy7 = 0.0f;           /* Energy of fourth grid point */
00117           float energy8 = 0.0f;           /* Energy of fourth grid point */
00118 
00119           const float x = gridspacing * (float) i;
00120 
00121 // help the vectorizer make reasonable decisions
00122 #if defined(__INTEL_COMPILER)
00123 #pragma vector always 
00124 #endif
00125           /* Calculate the interaction with each atom */
00126           /* SSE allows simultaneous calculations of  */
00127           /* multiple iterations                      */
00128           /* 6 flops per grid point */
00129           for (n=0; n<maxn; n+=3) {
00130             float dy2pdz2 = xrq[n + 1];
00131             float q = xrq[n + 2];
00132   
00133             float dx1 = x - xrq[n];
00134             energy1 += q / sqrtf(dx1*dx1 + dy2pdz2);
00135   
00136             float dx2 = dx1 + gridspacing;
00137             energy2 += q / sqrtf(dx2*dx2 + dy2pdz2);
00138   
00139             float dx3 = dx2 + gridspacing;
00140             energy3 += q / sqrtf(dx3*dx3 + dy2pdz2);
00141   
00142             float dx4 = dx3 + gridspacing;
00143             energy4 += q / sqrtf(dx4*dx4 + dy2pdz2);
00144   
00145             float dx5 = dx4 + gridspacing;
00146             energy5 += q / sqrtf(dx5*dx5 + dy2pdz2);
00147   
00148             float dx6 = dx5 + gridspacing;
00149             energy6 += q / sqrtf(dx6*dx6 + dy2pdz2);
00150   
00151             float dx7 = dx6 + gridspacing;
00152             energy7 += q / sqrtf(dx7*dx7 + dy2pdz2);
00153   
00154             float dx8 = dx7 + gridspacing;
00155             energy8 += q / sqrtf(dx8*dx8 + dy2pdz2);
00156           }
00157 
00158           grideners[voxaddr + i] = energy1;
00159           if (i+1 < numpt)
00160             grideners[voxaddr + i + 1] = energy2;
00161           if (i+2 < numpt)
00162             grideners[voxaddr + i + 2] = energy3;
00163           if (i+3 < numpt)
00164             grideners[voxaddr + i + 3] = energy4;
00165           if (i+4 < numpt)
00166             grideners[voxaddr + i + 4] = energy5;
00167           if (i+5 < numpt)
00168             grideners[voxaddr + i + 5] = energy6;
00169           if (i+6 < numpt)
00170             grideners[voxaddr + i + 6] = energy7;
00171           if (i+7 < numpt)
00172             grideners[voxaddr + i + 7] = energy8;
00173         }
00174       }
00175       totaltime = wkf_timer_timenow(timer);
00176 
00177       if (wkf_msg_timer_timeout(msgt)) {
00178         // XXX: we have to use printf here as msgInfo is not thread-safe yet.
00179         printf("thread[%d] plane %d/%ld time %.2f, elapsed %.1f, est. total: %.1f\n",
00180                threadid, k, numplane,
00181                totaltime - lasttime, totaltime, 
00182                totaltime * numplane / (k+1));
00183       }
00184     }
00185   }
00186 
00187   wkf_timer_destroy(timer);
00188   wkf_msg_timer_destroy(msgt);
00189   free(xrq);
00190 
00191   return NULL;
00192 }
00193 
00194 #else
00195 
00196 #define FLOPSPERATOMEVAL 6.0
00197 extern "C" void * energythread(void *voidparms) {
00198   int threadid;
00199   enthrparms *parms = NULL;
00200   wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00201   wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00202 
00203   /* 
00204    * copy in per-thread parameters 
00205    */
00206   const float *atoms = parms->atoms;
00207   float* grideners = parms->grideners;
00208   const long int numplane = parms->numplane;
00209   const long int numcol = parms->numcol;
00210   const long int numpt = parms->numpt;
00211   const long int natoms = parms->natoms;
00212   const float gridspacing = parms->gridspacing;
00213   int i, j, k, n;
00214   double lasttime, totaltime;
00215 
00216   /* Calculate the coulombic energy at each grid point from each atom
00217    * This is by far the most time consuming part of the process
00218    * We iterate over z,y,x, and then atoms
00219    */
00220 
00221   printf("thread %d started...\n", threadid);
00222   wkf_timerhandle timer = wkf_timer_create();
00223   wkf_timer_start(timer);
00224   wkfmsgtimer *msgt = wkf_msg_timer_create(5);
00225 
00226   // Holds atom x, dy**2+dz**2, and atom q as x/r/q/x/r/q...
00227   float * xrq = (float *) malloc(3*natoms * sizeof(float)); 
00228   int maxn = natoms * 3;
00229 
00230   // For each point in the cube...
00231   while (!wkf_threadlaunch_next(voidparms, &k)) {
00232     const float z = gridspacing * (float) k;
00233     lasttime = wkf_timer_timenow(timer);
00234     for (j=0; j<numcol; j++) {
00235       const float y = gridspacing * (float) j;
00236       long int voxaddr = numcol*numpt*k + numpt*j;
00237 
00238       // Prebuild a table of dy and dz values on a per atom basis
00239       for (n=0; n<natoms; n++) {
00240         int addr3 = n*3;
00241         int addr4 = n*4;
00242         float dy = y - atoms[addr4 + 1];
00243         float dz = z - atoms[addr4 + 2];
00244         xrq[addr3    ] = atoms[addr4];
00245         xrq[addr3 + 1] = dz*dz + dy*dy;
00246         xrq[addr3 + 2] = atoms[addr4 + 3];
00247       }
00248 
00249 #if defined(__INTEL_COMPILER)
00250 // help the vectorizer make reasonable decisions (used prime to keep it honest)
00251 #pragma loop count(1009)
00252 #endif
00253       for (i=0; i<numpt; i++) {
00254         float energy = grideners[voxaddr + i]; // Energy at current grid point
00255         const float x = gridspacing * (float) i;
00256 
00257 #if defined(__INTEL_COMPILER)
00258 // help the vectorizer make reasonable decisions
00259 #pragma vector always 
00260 #endif
00261         // Calculate the interaction with each atom
00262         for (n=0; n<maxn; n+=3) {
00263           float dx = x - xrq[n];
00264           energy += xrq[n + 2] / sqrtf(dx*dx + xrq[n + 1]);
00265         }
00266         grideners[voxaddr + i] = energy;
00267       }
00268     }
00269     totaltime = wkf_timer_timenow(timer);
00270 
00271     if (wkf_msg_timer_timeout(msgt)) {
00272       // XXX: we have to use printf here as msgInfo is not thread-safe yet.
00273       printf("thread[%d] plane %d/%ld time %.2f, elapsed %.1f, est. total: %.1f\n",
00274              threadid, k, numplane,
00275              totaltime - lasttime, totaltime, 
00276              totaltime * numplane / (k+1));
00277     }
00278   }
00279 
00280   wkf_timer_destroy(timer);
00281   wkf_msg_timer_destroy(msgt);
00282   free(xrq);
00283 
00284   return NULL;
00285 }
00286 
00287 #endif
00288 
00289 
00290 static int vol_cpotential_cpu(long int natoms, float* atoms, float* grideners, long int numplane, long int numcol, long int numpt, float gridspacing) {
00291   int rc=0;
00292   enthrparms parms;
00293   wkf_timerhandle globaltimer;
00294   double totalruntime;
00295 
00296 #if defined(VMDTHREADS)
00297   int numprocs = wkf_thread_numprocessors();
00298 #else
00299   int numprocs = 1;
00300 #endif
00301 
00302   printf("Using %d %s\n", numprocs, ((numprocs > 1) ? "CPUs" : "CPU"));
00303 
00304   parms.atoms = atoms;
00305   parms.grideners = grideners;
00306   parms.numplane = numplane;
00307   parms.numcol = numcol;
00308   parms.numpt = numpt;
00309   parms.natoms = natoms;
00310   parms.gridspacing = gridspacing;
00311 
00312   globaltimer = wkf_timer_create();
00313   wkf_timer_start(globaltimer);
00314 
00315   /* spawn child threads to do the work */
00316   wkf_tasktile_t tile;
00317   tile.start=0;
00318   tile.end=numplane;
00319   rc = wkf_threadlaunch(numprocs, &parms, energythread, &tile);
00320 
00321   // Measure GFLOPS
00322   wkf_timer_stop(globaltimer);
00323   totalruntime = wkf_timer_time(globaltimer);
00324   wkf_timer_destroy(globaltimer);
00325 
00326   if (!rc) {
00327     double atomevalssec = ((double) numplane * numcol * numpt * natoms) / (totalruntime * 1000000000.0);
00328     printf("  %g billion atom evals/second, %g GFLOPS\n",
00329            atomevalssec, atomevalssec * FLOPSPERATOMEVAL);
00330   } else {
00331     msgWarn << "Encountered an unrecoverable error, calculation terminated." << sendmsg;
00332   }
00333 
00334   return rc;
00335 }
00336 
00337 
00338 int vol_cpotential(long int natoms, float* atoms, float* grideners, long int numplane, long int numcol, long int numpt, float gridspacing) {
00339   int rc = -1; // init rc value to indicate we haven't run yet
00340   wkf_timerhandle timer = wkf_timer_create();
00341   wkf_timer_start(timer);
00342 
00343 #if defined(VMDCUDA)
00344   if (!getenv("VMDNOCUDA")) {
00345     rc = vmd_cuda_vol_cpotential(natoms, atoms, grideners, 
00346                                  numplane, numcol, numpt, gridspacing);
00347   }
00348 #endif
00349 #if defined(VMDOPENCL)
00350   if ((rc != 0) && !getenv("VMDNOOPENCL")) {
00351     rc = vmd_opencl_vol_cpotential(natoms, atoms, grideners, 
00352                                    numplane, numcol, numpt, gridspacing);
00353   }
00354 #endif
00355 
00356   // if we tried to run on the GPU and failed, or we haven't run yet,
00357   // run on the CPU
00358   if (rc != 0)
00359     rc = vol_cpotential_cpu(natoms, atoms, grideners, 
00360                             numplane, numcol, numpt, gridspacing);
00361 
00362   double totaltime = wkf_timer_timenow(timer);
00363   wkf_timer_destroy(timer);
00364 
00365   msgInfo << "Coulombic potential map calculation complete: "
00366           << totaltime << " seconds" << sendmsg;
00367    
00368   return rc;
00369 }
00370 
00371 
00372 

Generated on Fri Nov 8 02:45:45 2024 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002