00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 #include <stdio.h>
00020 #include <stdlib.h>
00021 #include <math.h>
00022 #include <string.h>
00023 
00024 #include "config.h"         
00025 #include "utilities.h"
00026 #include "Inform.h"
00027 #include "WKFThreads.h"
00028 #include "WKFUtils.h"
00029 #include "VolCPotential.h" 
00030 #if defined(VMDCUDA)
00031 #include "CUDAKernels.h"
00032 #endif
00033 #if defined(VMDOPENCL)
00034 #include "OpenCLKernels.h"
00035 #endif
00036 
00037 typedef struct {
00038   float* atoms;
00039   float* grideners;
00040   long int numplane;
00041   long int numcol;
00042   long int numpt;
00043   long int natoms;
00044   float gridspacing;
00045 } enthrparms;
00046 
00047 #if 1 || defined(__INTEL_COMPILER)
00048 
00049 #define FLOPSPERATOMEVAL 6.0
00050 extern "C" void * energythread(void *voidparms) {
00051   int threadid;
00052   enthrparms *parms = NULL;
00053   wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00054   wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00055 
00056   
00057 
00058 
00059   const float *atoms = parms->atoms;
00060   float* grideners = parms->grideners;
00061   const long int numplane = parms->numplane;
00062   const long int numcol = parms->numcol;
00063   const long int numpt = parms->numpt;
00064   const long int natoms = parms->natoms;
00065   const float gridspacing = parms->gridspacing;
00066   int i, j, k, n;
00067   double lasttime, totaltime;
00068 
00069   
00070 
00071 
00072 
00073 
00074   printf("thread %d started...\n", threadid);
00075   wkf_timerhandle timer = wkf_timer_create();
00076   wkf_timer_start(timer);
00077   wkfmsgtimer *msgt = wkf_msg_timer_create(5);
00078 
00079   
00080   float * xrq = (float *) malloc(3*natoms * sizeof(float)); 
00081   int maxn = natoms * 3;
00082 
00083   
00084   wkf_tasktile_t tile;
00085   while (wkf_threadlaunch_next_tile(voidparms, 2, &tile) != WKF_SCHED_DONE) {
00086     for (k=tile.start; k<tile.end; k++) { 
00087       const float z = gridspacing * (float) k;
00088       lasttime = wkf_timer_timenow(timer);
00089       for (j=0; j<numcol; j++) {
00090         const float y = gridspacing * (float) j;
00091         long int voxaddr = numcol*numpt*k + numpt*j;
00092 
00093         
00094         for (n=0; n<natoms; n++) {
00095           int addr3 = n*3;
00096           int addr4 = n*4;
00097           float dy = y - atoms[addr4 + 1];
00098           float dz = z - atoms[addr4 + 2];
00099           xrq[addr3    ] = atoms[addr4];
00100           xrq[addr3 + 1] = dz*dz + dy*dy;
00101           xrq[addr3 + 2] = atoms[addr4 + 3];
00102         }
00103 
00104 
00105 #if defined(__INTEL_COMPILER)
00106 #pragma vector always 
00107 #endif
00108         
00109         for (i=0; i<numpt; i+=8) {
00110           float energy1 = 0.0f;           
00111           float energy2 = 0.0f;           
00112           float energy3 = 0.0f;           
00113           float energy4 = 0.0f;           
00114           float energy5 = 0.0f;           
00115           float energy6 = 0.0f;           
00116           float energy7 = 0.0f;           
00117           float energy8 = 0.0f;           
00118 
00119           const float x = gridspacing * (float) i;
00120 
00121 
00122 #if defined(__INTEL_COMPILER)
00123 #pragma vector always 
00124 #endif
00125           
00126           
00127           
00128           
00129           for (n=0; n<maxn; n+=3) {
00130             float dy2pdz2 = xrq[n + 1];
00131             float q = xrq[n + 2];
00132   
00133             float dx1 = x - xrq[n];
00134             energy1 += q / sqrtf(dx1*dx1 + dy2pdz2);
00135   
00136             float dx2 = dx1 + gridspacing;
00137             energy2 += q / sqrtf(dx2*dx2 + dy2pdz2);
00138   
00139             float dx3 = dx2 + gridspacing;
00140             energy3 += q / sqrtf(dx3*dx3 + dy2pdz2);
00141   
00142             float dx4 = dx3 + gridspacing;
00143             energy4 += q / sqrtf(dx4*dx4 + dy2pdz2);
00144   
00145             float dx5 = dx4 + gridspacing;
00146             energy5 += q / sqrtf(dx5*dx5 + dy2pdz2);
00147   
00148             float dx6 = dx5 + gridspacing;
00149             energy6 += q / sqrtf(dx6*dx6 + dy2pdz2);
00150   
00151             float dx7 = dx6 + gridspacing;
00152             energy7 += q / sqrtf(dx7*dx7 + dy2pdz2);
00153   
00154             float dx8 = dx7 + gridspacing;
00155             energy8 += q / sqrtf(dx8*dx8 + dy2pdz2);
00156           }
00157 
00158           grideners[voxaddr + i] = energy1;
00159           if (i+1 < numpt)
00160             grideners[voxaddr + i + 1] = energy2;
00161           if (i+2 < numpt)
00162             grideners[voxaddr + i + 2] = energy3;
00163           if (i+3 < numpt)
00164             grideners[voxaddr + i + 3] = energy4;
00165           if (i+4 < numpt)
00166             grideners[voxaddr + i + 4] = energy5;
00167           if (i+5 < numpt)
00168             grideners[voxaddr + i + 5] = energy6;
00169           if (i+6 < numpt)
00170             grideners[voxaddr + i + 6] = energy7;
00171           if (i+7 < numpt)
00172             grideners[voxaddr + i + 7] = energy8;
00173         }
00174       }
00175       totaltime = wkf_timer_timenow(timer);
00176 
00177       if (wkf_msg_timer_timeout(msgt)) {
00178         
00179         printf("thread[%d] plane %d/%ld time %.2f, elapsed %.1f, est. total: %.1f\n",
00180                threadid, k, numplane,
00181                totaltime - lasttime, totaltime, 
00182                totaltime * numplane / (k+1));
00183       }
00184     }
00185   }
00186 
00187   wkf_timer_destroy(timer);
00188   wkf_msg_timer_destroy(msgt);
00189   free(xrq);
00190 
00191   return NULL;
00192 }
00193 
00194 #else
00195 
00196 #define FLOPSPERATOMEVAL 6.0
00197 extern "C" void * energythread(void *voidparms) {
00198   int threadid;
00199   enthrparms *parms = NULL;
00200   wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00201   wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00202 
00203   
00204 
00205 
00206   const float *atoms = parms->atoms;
00207   float* grideners = parms->grideners;
00208   const long int numplane = parms->numplane;
00209   const long int numcol = parms->numcol;
00210   const long int numpt = parms->numpt;
00211   const long int natoms = parms->natoms;
00212   const float gridspacing = parms->gridspacing;
00213   int i, j, k, n;
00214   double lasttime, totaltime;
00215 
00216   
00217 
00218 
00219 
00220 
00221   printf("thread %d started...\n", threadid);
00222   wkf_timerhandle timer = wkf_timer_create();
00223   wkf_timer_start(timer);
00224   wkfmsgtimer *msgt = wkf_msg_timer_create(5);
00225 
00226   
00227   float * xrq = (float *) malloc(3*natoms * sizeof(float)); 
00228   int maxn = natoms * 3;
00229 
00230   
00231   while (!wkf_threadlaunch_next(voidparms, &k)) {
00232     const float z = gridspacing * (float) k;
00233     lasttime = wkf_timer_timenow(timer);
00234     for (j=0; j<numcol; j++) {
00235       const float y = gridspacing * (float) j;
00236       long int voxaddr = numcol*numpt*k + numpt*j;
00237 
00238       
00239       for (n=0; n<natoms; n++) {
00240         int addr3 = n*3;
00241         int addr4 = n*4;
00242         float dy = y - atoms[addr4 + 1];
00243         float dz = z - atoms[addr4 + 2];
00244         xrq[addr3    ] = atoms[addr4];
00245         xrq[addr3 + 1] = dz*dz + dy*dy;
00246         xrq[addr3 + 2] = atoms[addr4 + 3];
00247       }
00248 
00249 #if defined(__INTEL_COMPILER)
00250 
00251 #pragma loop count(1009)
00252 #endif
00253       for (i=0; i<numpt; i++) {
00254         float energy = grideners[voxaddr + i]; 
00255         const float x = gridspacing * (float) i;
00256 
00257 #if defined(__INTEL_COMPILER)
00258 
00259 #pragma vector always 
00260 #endif
00261         
00262         for (n=0; n<maxn; n+=3) {
00263           float dx = x - xrq[n];
00264           energy += xrq[n + 2] / sqrtf(dx*dx + xrq[n + 1]);
00265         }
00266         grideners[voxaddr + i] = energy;
00267       }
00268     }
00269     totaltime = wkf_timer_timenow(timer);
00270 
00271     if (wkf_msg_timer_timeout(msgt)) {
00272       
00273       printf("thread[%d] plane %d/%ld time %.2f, elapsed %.1f, est. total: %.1f\n",
00274              threadid, k, numplane,
00275              totaltime - lasttime, totaltime, 
00276              totaltime * numplane / (k+1));
00277     }
00278   }
00279 
00280   wkf_timer_destroy(timer);
00281   wkf_msg_timer_destroy(msgt);
00282   free(xrq);
00283 
00284   return NULL;
00285 }
00286 
00287 #endif
00288 
00289 
00290 static int vol_cpotential_cpu(long int natoms, float* atoms, float* grideners, long int numplane, long int numcol, long int numpt, float gridspacing) {
00291   int rc=0;
00292   enthrparms parms;
00293   wkf_timerhandle globaltimer;
00294   double totalruntime;
00295 
00296 #if defined(VMDTHREADS)
00297   int numprocs = wkf_thread_numprocessors();
00298 #else
00299   int numprocs = 1;
00300 #endif
00301 
00302   printf("Using %d %s\n", numprocs, ((numprocs > 1) ? "CPUs" : "CPU"));
00303 
00304   parms.atoms = atoms;
00305   parms.grideners = grideners;
00306   parms.numplane = numplane;
00307   parms.numcol = numcol;
00308   parms.numpt = numpt;
00309   parms.natoms = natoms;
00310   parms.gridspacing = gridspacing;
00311 
00312   globaltimer = wkf_timer_create();
00313   wkf_timer_start(globaltimer);
00314 
00315   
00316   wkf_tasktile_t tile;
00317   tile.start=0;
00318   tile.end=numplane;
00319   rc = wkf_threadlaunch(numprocs, &parms, energythread, &tile);
00320 
00321   
00322   wkf_timer_stop(globaltimer);
00323   totalruntime = wkf_timer_time(globaltimer);
00324   wkf_timer_destroy(globaltimer);
00325 
00326   if (!rc) {
00327     double atomevalssec = ((double) numplane * numcol * numpt * natoms) / (totalruntime * 1000000000.0);
00328     printf("  %g billion atom evals/second, %g GFLOPS\n",
00329            atomevalssec, atomevalssec * FLOPSPERATOMEVAL);
00330   } else {
00331     msgWarn << "Encountered an unrecoverable error, calculation terminated." << sendmsg;
00332   }
00333 
00334   return rc;
00335 }
00336 
00337 
00338 int vol_cpotential(long int natoms, float* atoms, float* grideners, long int numplane, long int numcol, long int numpt, float gridspacing) {
00339   int rc = -1; 
00340   wkf_timerhandle timer = wkf_timer_create();
00341   wkf_timer_start(timer);
00342 
00343 #if defined(VMDCUDA)
00344   if (!getenv("VMDNOCUDA")) {
00345     rc = vmd_cuda_vol_cpotential(natoms, atoms, grideners, 
00346                                  numplane, numcol, numpt, gridspacing);
00347   }
00348 #endif
00349 #if defined(VMDOPENCL)
00350   if ((rc != 0) && !getenv("VMDNOOPENCL")) {
00351     rc = vmd_opencl_vol_cpotential(natoms, atoms, grideners, 
00352                                    numplane, numcol, numpt, gridspacing);
00353   }
00354 #endif
00355 
00356   
00357   
00358   if (rc != 0)
00359     rc = vol_cpotential_cpu(natoms, atoms, grideners, 
00360                             numplane, numcol, numpt, gridspacing);
00361 
00362   double totaltime = wkf_timer_timenow(timer);
00363   wkf_timer_destroy(timer);
00364 
00365   msgInfo << "Coulombic potential map calculation complete: "
00366           << totaltime << " seconds" << sendmsg;
00367    
00368   return rc;
00369 }
00370 
00371 
00372