00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <stdio.h>
00020 #include <stdlib.h>
00021 #include <math.h>
00022 #include <string.h>
00023
00024 #include "config.h"
00025 #include "utilities.h"
00026 #include "Inform.h"
00027 #include "WKFThreads.h"
00028 #include "WKFUtils.h"
00029 #include "VolCPotential.h"
00030 #if defined(VMDCUDA)
00031 #include "CUDAKernels.h"
00032 #endif
00033 #if defined(VMDOPENCL)
00034 #include "OpenCLKernels.h"
00035 #endif
00036
00037 typedef struct {
00038 float* atoms;
00039 float* grideners;
00040 long int numplane;
00041 long int numcol;
00042 long int numpt;
00043 long int natoms;
00044 float gridspacing;
00045 } enthrparms;
00046
00047 #if 1 || defined(__INTEL_COMPILER)
00048
00049 #define FLOPSPERATOMEVAL 6.0
00050 extern "C" void * energythread(void *voidparms) {
00051 int threadid;
00052 enthrparms *parms = NULL;
00053 wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00054 wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00055
00056
00057
00058
00059 const float *atoms = parms->atoms;
00060 float* grideners = parms->grideners;
00061 const long int numplane = parms->numplane;
00062 const long int numcol = parms->numcol;
00063 const long int numpt = parms->numpt;
00064 const long int natoms = parms->natoms;
00065 const float gridspacing = parms->gridspacing;
00066 int i, j, k, n;
00067 double lasttime, totaltime;
00068
00069
00070
00071
00072
00073
00074 printf("thread %d started...\n", threadid);
00075 wkf_timerhandle timer = wkf_timer_create();
00076 wkf_timer_start(timer);
00077 wkfmsgtimer *msgt = wkf_msg_timer_create(5);
00078
00079
00080 float * xrq = (float *) malloc(3*natoms * sizeof(float));
00081 int maxn = natoms * 3;
00082
00083
00084 wkf_tasktile_t tile;
00085 while (wkf_threadlaunch_next_tile(voidparms, 2, &tile) != WKF_SCHED_DONE) {
00086 for (k=tile.start; k<tile.end; k++) {
00087 const float z = gridspacing * (float) k;
00088 lasttime = wkf_timer_timenow(timer);
00089 for (j=0; j<numcol; j++) {
00090 const float y = gridspacing * (float) j;
00091 long int voxaddr = numcol*numpt*k + numpt*j;
00092
00093
00094 for (n=0; n<natoms; n++) {
00095 int addr3 = n*3;
00096 int addr4 = n*4;
00097 float dy = y - atoms[addr4 + 1];
00098 float dz = z - atoms[addr4 + 2];
00099 xrq[addr3 ] = atoms[addr4];
00100 xrq[addr3 + 1] = dz*dz + dy*dy;
00101 xrq[addr3 + 2] = atoms[addr4 + 3];
00102 }
00103
00104
00105 #if defined(__INTEL_COMPILER)
00106 #pragma vector always
00107 #endif
00108
00109 for (i=0; i<numpt; i+=8) {
00110 float energy1 = 0.0f;
00111 float energy2 = 0.0f;
00112 float energy3 = 0.0f;
00113 float energy4 = 0.0f;
00114 float energy5 = 0.0f;
00115 float energy6 = 0.0f;
00116 float energy7 = 0.0f;
00117 float energy8 = 0.0f;
00118
00119 const float x = gridspacing * (float) i;
00120
00121
00122 #if defined(__INTEL_COMPILER)
00123 #pragma vector always
00124 #endif
00125
00126
00127
00128
00129 for (n=0; n<maxn; n+=3) {
00130 float dy2pdz2 = xrq[n + 1];
00131 float q = xrq[n + 2];
00132
00133 float dx1 = x - xrq[n];
00134 energy1 += q / sqrtf(dx1*dx1 + dy2pdz2);
00135
00136 float dx2 = dx1 + gridspacing;
00137 energy2 += q / sqrtf(dx2*dx2 + dy2pdz2);
00138
00139 float dx3 = dx2 + gridspacing;
00140 energy3 += q / sqrtf(dx3*dx3 + dy2pdz2);
00141
00142 float dx4 = dx3 + gridspacing;
00143 energy4 += q / sqrtf(dx4*dx4 + dy2pdz2);
00144
00145 float dx5 = dx4 + gridspacing;
00146 energy5 += q / sqrtf(dx5*dx5 + dy2pdz2);
00147
00148 float dx6 = dx5 + gridspacing;
00149 energy6 += q / sqrtf(dx6*dx6 + dy2pdz2);
00150
00151 float dx7 = dx6 + gridspacing;
00152 energy7 += q / sqrtf(dx7*dx7 + dy2pdz2);
00153
00154 float dx8 = dx7 + gridspacing;
00155 energy8 += q / sqrtf(dx8*dx8 + dy2pdz2);
00156 }
00157
00158 grideners[voxaddr + i] = energy1;
00159 if (i+1 < numpt)
00160 grideners[voxaddr + i + 1] = energy2;
00161 if (i+2 < numpt)
00162 grideners[voxaddr + i + 2] = energy3;
00163 if (i+3 < numpt)
00164 grideners[voxaddr + i + 3] = energy4;
00165 if (i+4 < numpt)
00166 grideners[voxaddr + i + 4] = energy5;
00167 if (i+5 < numpt)
00168 grideners[voxaddr + i + 5] = energy6;
00169 if (i+6 < numpt)
00170 grideners[voxaddr + i + 6] = energy7;
00171 if (i+7 < numpt)
00172 grideners[voxaddr + i + 7] = energy8;
00173 }
00174 }
00175 totaltime = wkf_timer_timenow(timer);
00176
00177 if (wkf_msg_timer_timeout(msgt)) {
00178
00179 printf("thread[%d] plane %d/%ld time %.2f, elapsed %.1f, est. total: %.1f\n",
00180 threadid, k, numplane,
00181 totaltime - lasttime, totaltime,
00182 totaltime * numplane / (k+1));
00183 }
00184 }
00185 }
00186
00187 wkf_timer_destroy(timer);
00188 wkf_msg_timer_destroy(msgt);
00189 free(xrq);
00190
00191 return NULL;
00192 }
00193
00194 #else
00195
00196 #define FLOPSPERATOMEVAL 6.0
00197 extern "C" void * energythread(void *voidparms) {
00198 int threadid;
00199 enthrparms *parms = NULL;
00200 wkf_threadlaunch_getdata(voidparms, (void **) &parms);
00201 wkf_threadlaunch_getid(voidparms, &threadid, NULL);
00202
00203
00204
00205
00206 const float *atoms = parms->atoms;
00207 float* grideners = parms->grideners;
00208 const long int numplane = parms->numplane;
00209 const long int numcol = parms->numcol;
00210 const long int numpt = parms->numpt;
00211 const long int natoms = parms->natoms;
00212 const float gridspacing = parms->gridspacing;
00213 int i, j, k, n;
00214 double lasttime, totaltime;
00215
00216
00217
00218
00219
00220
00221 printf("thread %d started...\n", threadid);
00222 wkf_timerhandle timer = wkf_timer_create();
00223 wkf_timer_start(timer);
00224 wkfmsgtimer *msgt = wkf_msg_timer_create(5);
00225
00226
00227 float * xrq = (float *) malloc(3*natoms * sizeof(float));
00228 int maxn = natoms * 3;
00229
00230
00231 while (!wkf_threadlaunch_next(voidparms, &k)) {
00232 const float z = gridspacing * (float) k;
00233 lasttime = wkf_timer_timenow(timer);
00234 for (j=0; j<numcol; j++) {
00235 const float y = gridspacing * (float) j;
00236 long int voxaddr = numcol*numpt*k + numpt*j;
00237
00238
00239 for (n=0; n<natoms; n++) {
00240 int addr3 = n*3;
00241 int addr4 = n*4;
00242 float dy = y - atoms[addr4 + 1];
00243 float dz = z - atoms[addr4 + 2];
00244 xrq[addr3 ] = atoms[addr4];
00245 xrq[addr3 + 1] = dz*dz + dy*dy;
00246 xrq[addr3 + 2] = atoms[addr4 + 3];
00247 }
00248
00249 #if defined(__INTEL_COMPILER)
00250
00251 #pragma loop count(1009)
00252 #endif
00253 for (i=0; i<numpt; i++) {
00254 float energy = grideners[voxaddr + i];
00255 const float x = gridspacing * (float) i;
00256
00257 #if defined(__INTEL_COMPILER)
00258
00259 #pragma vector always
00260 #endif
00261
00262 for (n=0; n<maxn; n+=3) {
00263 float dx = x - xrq[n];
00264 energy += xrq[n + 2] / sqrtf(dx*dx + xrq[n + 1]);
00265 }
00266 grideners[voxaddr + i] = energy;
00267 }
00268 }
00269 totaltime = wkf_timer_timenow(timer);
00270
00271 if (wkf_msg_timer_timeout(msgt)) {
00272
00273 printf("thread[%d] plane %d/%ld time %.2f, elapsed %.1f, est. total: %.1f\n",
00274 threadid, k, numplane,
00275 totaltime - lasttime, totaltime,
00276 totaltime * numplane / (k+1));
00277 }
00278 }
00279
00280 wkf_timer_destroy(timer);
00281 wkf_msg_timer_destroy(msgt);
00282 free(xrq);
00283
00284 return NULL;
00285 }
00286
00287 #endif
00288
00289
00290 static int vol_cpotential_cpu(long int natoms, float* atoms, float* grideners, long int numplane, long int numcol, long int numpt, float gridspacing) {
00291 int rc=0;
00292 enthrparms parms;
00293 wkf_timerhandle globaltimer;
00294 double totalruntime;
00295
00296 #if defined(VMDTHREADS)
00297 int numprocs = wkf_thread_numprocessors();
00298 #else
00299 int numprocs = 1;
00300 #endif
00301
00302 printf("Using %d %s\n", numprocs, ((numprocs > 1) ? "CPUs" : "CPU"));
00303
00304 parms.atoms = atoms;
00305 parms.grideners = grideners;
00306 parms.numplane = numplane;
00307 parms.numcol = numcol;
00308 parms.numpt = numpt;
00309 parms.natoms = natoms;
00310 parms.gridspacing = gridspacing;
00311
00312 globaltimer = wkf_timer_create();
00313 wkf_timer_start(globaltimer);
00314
00315
00316 wkf_tasktile_t tile;
00317 tile.start=0;
00318 tile.end=numplane;
00319 rc = wkf_threadlaunch(numprocs, &parms, energythread, &tile);
00320
00321
00322 wkf_timer_stop(globaltimer);
00323 totalruntime = wkf_timer_time(globaltimer);
00324 wkf_timer_destroy(globaltimer);
00325
00326 if (!rc) {
00327 double atomevalssec = ((double) numplane * numcol * numpt * natoms) / (totalruntime * 1000000000.0);
00328 printf(" %g billion atom evals/second, %g GFLOPS\n",
00329 atomevalssec, atomevalssec * FLOPSPERATOMEVAL);
00330 } else {
00331 msgWarn << "Encountered an unrecoverable error, calculation terminated." << sendmsg;
00332 }
00333
00334 return rc;
00335 }
00336
00337
00338 int vol_cpotential(long int natoms, float* atoms, float* grideners, long int numplane, long int numcol, long int numpt, float gridspacing) {
00339 int rc = -1;
00340 wkf_timerhandle timer = wkf_timer_create();
00341 wkf_timer_start(timer);
00342
00343 #if defined(VMDCUDA)
00344 if (!getenv("VMDNOCUDA")) {
00345 rc = vmd_cuda_vol_cpotential(natoms, atoms, grideners,
00346 numplane, numcol, numpt, gridspacing);
00347 }
00348 #endif
00349 #if defined(VMDOPENCL)
00350 if ((rc != 0) && !getenv("VMDNOOPENCL")) {
00351 rc = vmd_opencl_vol_cpotential(natoms, atoms, grideners,
00352 numplane, numcol, numpt, gridspacing);
00353 }
00354 #endif
00355
00356
00357
00358 if (rc != 0)
00359 rc = vol_cpotential_cpu(natoms, atoms, grideners,
00360 numplane, numcol, numpt, gridspacing);
00361
00362 double totaltime = wkf_timer_timenow(timer);
00363 wkf_timer_destroy(timer);
00364
00365 msgInfo << "Coulombic potential map calculation complete: "
00366 << totaltime << " seconds" << sendmsg;
00367
00368 return rc;
00369 }
00370
00371
00372